HomeAboutCodePastes
summaryrefslogtreecommitdiff
path: root/espeak.scm
blob: 6689f5f83ec14ee3b9abd0610c57b465efd07b7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
(module espeak (say
                reset-defaults!
                make-voice
                voice?
                voice-name
                voice-name-set!
                voice-language
                voice-language-set!
                voice-identifier
                voice-identifier-set!
                voice-gender
                voice-gender-set!
                voice-age
                voice-age-set!
                voice-variant
                voice-variant-set!
                gender/none
                gender/male
                gender/female
                output/playback
                output/retrieval
                output/synchronous
                output/synch-playback
                initialize
                pos/char
                pos/word
                pos/sentence
                synth
                synth-mark
                key
                char
                param/rate
                param/volume
                param/pitch
                param/range
                param/punctuation
                param/capitals
                param/wordgap
                punct/none
                punct/all
                punct/some
                capitals/none
                capitals/sound-icon
                capitals/spelling
                set-parameter!
                get-parameter
                set-punctuation-list!
                text->phonemes
                list-voices
                set-voice-by-file!
                set-voice-by-name!
                set-voice-by-properties!
                get-current-voice
                cancel
                playing?
                synchronize
                terminate
                info)
  (import scheme
          utf8
          chicken.foreign
          (only chicken.base
                void define-record-type getter-with-setter add1 when
                error)
          (only chicken.bitwise arithmetic-shift)
          (only chicken.condition
                signal
                make-composite-condition
                make-property-condition)
          (only chicken.bitwise bitwise-xor)
          (only foreigners define-foreign-enum-type)
          (only srfi-18 thread-start!))

  (foreign-declare "#include <espeak-ng/speak_lib.h>")

  (define %initialized #f)

  (define-record-type voice
    (ptr->voice ptr)
    voice?
    (ptr voice->ptr))

  (define-foreign-type espeak-voice
    (c-pointer "espeak_VOICE")
    voice->ptr
    ptr->voice)

  (define (make-voice #!key name language identifier
                             (gender 0) (age 0) (variant 0))
    ((foreign-lambda* espeak-voice ((c-string name) (c-string language) (c-string identifier)
                                    (unsigned-byte gender) (unsigned-byte age) (unsigned-byte variant))
       "espeak_VOICE voice = {0};"
       "voice.name = name;"
       "voice.languages = language;"
       "voice.identifier = identifier;"
       "voice.gender = gender;"
       "voice.age = age;"
       "voice.variant = variant;"
       "C_return(&voice);")
     name language identifier gender age variant))


  (define (%voice-name voice)
    ((foreign-lambda* c-string ((espeak-voice voice))
       "C_return(voice->name);")
     voice))
  (define (voice-name-set! voice name)
    ((foreign-lambda* void ((espeak-voice voice) (c-string name))
       "voice->name = name;")
     voice name))
  (define voice-name
    (getter-with-setter %voice-name voice-name-set!))

  (define (%voice-language voice)
    ((foreign-lambda* c-string ((espeak-voice voice))
       "C_return(voice->languages);")
     voice))
  (define (voice-language-set! voice language)
    ((foreign-lambda* void ((espeak-voice voice) (c-string language))
       "voice->languages = language;")
     voice language))
  (define voice-language
    (getter-with-setter %voice-language voice-language-set!))

  (define (%voice-identifier voice)
    ((foreign-lambda* c-string ((espeak-voice voice))
       "C_return(voice->identifier);")
     voice))
  (define (voice-identifier-set! voice identifier)
    ((foreign-lambda* void ((espeak-voice voice) (c-string identifier))
       "voice->identifier = identifier;")
     voice identifier))
  (define voice-identifier
    (getter-with-setter %voice-identifier voice-identifier-set!))

  (define (%voice-gender voice)
    ((foreign-lambda* unsigned-byte ((espeak-voice voice))
       "C_return(voice->gender);")
     voice))
  (define (voice-gender-set! voice gender)
    ((foreign-lambda* void ((espeak-voice voice) (unsigned-byte gender))
       "voice->gender = gender;")
     voice gender))
  (define voice-gender
    (getter-with-setter %voice-gender voice-gender-set!))

  (define (%voice-age voice)
    ((foreign-lambda* unsigned-byte ((espeak-voice voice))
       "C_return(voice->age);")
     voice))
  (define (voice-age-set! voice age)
    ((foreign-lambda* void ((espeak-voice voice) (unsigned-byte age))
       "voice->age = age;")
     voice age))
  (define voice-age
    (getter-with-setter %voice-age voice-age-set!))

  (define (%voice-variant voice)
    ((foreign-lambda* unsigned-byte ((espeak-voice voice))
       "C_return(voice->variant);")
     voice))
  (define (voice-variant-set! voice variant)
    ((foreign-lambda* void ((espeak-voice voice) (unsigned-byte variant))
       "voice->variant = variant;")
     voice variant))
  (define voice-variant
    (getter-with-setter %voice-variant voice-variant-set!))

  (define gender/none 0)
  (define gender/male 1)
  (define gender/female 2)

  (define-foreign-enum-type (output int)
    (output->int int->output)
    ((output/playback AUDIO_OUTPUT_PLAYBACK) AUDIO_OUTPUT_PLAYBACK)
    ((output/retrieval AUDIO_OUTPUT_RETRIEVAL) AUDIO_OUTPUT_RETRIEVAL)
    ((output/synchronous AUDIO_OUTPUT_SYNCHRONOUS) AUDIO_OUTPUT_SYNCHRONOUS)
    ((output/synch-playback AUDIO_OUTPUT_SYNCH_PLAYBACK) AUDIO_OUTPUT_SYNCH_PLAYBACK))

  (define output/playback AUDIO_OUTPUT_PLAYBACK)
  (define output/retrieval AUDIO_OUTPUT_RETRIEVAL)
  (define output/synchronous AUDIO_OUTPUT_SYNCHRONOUS)
  (define output/synch-playback AUDIO_OUTPUT_SYNCH_PLAYBACK)

  (define (espeak-error code loc)
    (cond ((= code (foreign-value EE_INTERNAL_ERROR int))
           (signal
            (make-composite-condition
             (make-property-condition 'exn 'location loc 'message "espeak internal error")
             (make-property-condition 'espeak)
             (make-property-condition 'internal-error))))
          ((= code (foreign-value EE_BUFFER_FULL int))
           (signal
            (make-composite-condition
             (make-property-condition 'exn 'location loc 'message "espeak buffer full")
             (make-property-condition 'espeak)
             (make-property-condition 'buffer-full))))
          ((= code (foreign-value EE_NOT_FOUND int))
           (signal
            (make-composite-condition
             (make-property-condition 'exn 'location loc 'message "espeak not found error")
             (make-property-condition 'espeak)
             (make-property-condition 'not-found))))
          (else (void))))

  ;; Should only be called once per program - can otherwise cause errors. I'm
  ;; not exactly sure how this works, only that if I initialize something more
  ;; than once audio either doesn't play or I get an error that says 'error:
  ;; entity already exists' and I have NO idea where that comes from.
  (define (initialize #!key
                      (output output/playback)
                      (buflength 0)
                      (path #f)
                      (phoneme-events #f)
                      (phoneme-ipa #f)
                      (dont-exit #f))

    (if %initialized
        (void)
        (let* ((options
                (bitwise-xor
                 (if phoneme-events
                     (foreign-value espeakINITIALIZE_PHONEME_EVENTS int)
                     0)
                 (if phoneme-ipa
                     (foreign-value espeakINITIALIZE_PHONEME_IPA int)
                     0)
                 (if dont-exit
                     (foreign-value espeakINITIALIZE_DONT_EXIT int)
                     0)))
               (return
                ((foreign-lambda int "espeak_Initialize" int int c-string int)
                 output buflength path options)))
          ;; (print "hey")
          (if (= (foreign-value EE_INTERNAL_ERROR int) return)
              (espeak-error return 'initialize)
              (begin
                (set! %initialized #t)
                return)))))

  (define-foreign-enum-type (position int)
    (position->int int->position)
    ((pos/char POS_CHARACTER) POS_CHARACTER)
    ((pos/word POS_WORD) POS_WORD)
    ((pos/sentence POS_SENTENCE) POS_SENTENCE))

  (define pos/char POS_CHARACTER)
  (define pos/word POS_WORD)
  (define pos/sentence POS_SENTENCE)

  (define espeakSSML (foreign-value espeakSSML int))
  (define espeakPHONEMES (foreign-value espeakPHONEMES int))
  (define espeakENDPAUSE (foreign-value espeakENDPAUSE int))

  ;; position/char and position-sentence seem broken?
  (define (synth text #!key
                        (position 0)
                        (position-type pos/char)
                        (end-position 0)
                        (ssml #f)
                        (phonemes #f)
                        (endpause #f))
    (initialize)
    (let* ((end-position (or end-position 0))
           (flags (bitwise-xor
                   (if ssml espeakSSML 0)
                   (if phonemes espeakPHONEMES 0)
                   (if endpause espeakENDPAUSE 0))))
      (espeak-error
       ((foreign-lambda int "espeak_Synth"
          c-string size_t unsigned-int int unsigned-int unsigned-int
          c-pointer c-pointer)
        text (add1 (string-length text)) position position-type end-position flags #f #f)
       'synth)))

  ;; Not exactly sure how this one works?
  (define (synth-mark text
                      index-mark
                      #!key
                      (end-position 0)
                      (ssml #f)
                      (phonemes #f)
                      (endpause #f))
    (initialize)
    (let ((flags (bitwise-xor
                  (if ssml espeakSSML 0)
                  (if phonemes espeakPHONEMES 0)
                  (if endpause espeakENDPAUSE 0))))
      (espeak-error
       ((foreign-lambda int "espeak_Synth_Mark"
          c-string size_t c-string unsigned-int unsigned-int
          c-pointer c-pointer)
        text (add1 (string-length text)) index-mark end-position flags #f #f)
       'synth)))

  (define (key key-name)
    (initialize)
    (espeak-error
     ((foreign-lambda int "espeak_Key" c-string)
      key-name)
     'key))

  ;; Seems broken on longer char names? Repeats twice for some.
  (define (char char)
    (initialize)
    (espeak-error
     ((foreign-lambda int "espeak_Char" unsigned-int)
      (char->integer char))
     'key))

  (define-foreign-enum-type (param int)
    (param->int int->param)
    ((param/rate espeakRATE) espeakRATE)
    ((param/volume espeakVOLUME) espeakVOLUME)
    ((param/pitch espeakPITCH) espeakPITCH)
    ((param/range espeakRANGE) espeakRANGE)
    ((param/punctuation espeakPUNCTUATION) espeakPUNCTUATION)
    ((param/capitals espeakCAPITALS) espeakCAPITALS)
    ((param/wordgap espeakWORDGAP) espeakWORDGAP))

  (define-foreign-enum-type (punct int)
    (punct->int int->punct)
    ((punct/none espeakPUNCT_NONE) espeakPUNCT_NONE)
    ((punct/all espeakPUNCT_ALL) espeakPUNCT_ALL)
    ((punct/some espeakPUNCT_SOME) espeakPUNCT_SOME))

  (define punct/none espeakPUNCT_NONE)
  (define punct/all espeakPUNCT_ALL)
  (define punct/some espeakPUNCT_SOME)

  (define capitals/none 0)
  (define capitals/sound-icon 1)
  (define capitals/spelling 2)

  (define param/rate        espeakRATE)
  (define param/volume      espeakVOLUME)
  (define param/pitch       espeakPITCH)
  (define param/range       espeakRANGE)
  (define param/punctuation espeakPUNCTUATION)
  (define param/capitals    espeakCAPITALS)
  (define param/wordgap     espeakWORDGAP)

  (define (set-parameter! parameter value #!optional relative)
    (espeak-error
     ((foreign-lambda int "espeak_SetParameter" int int int)
      parameter value (if relative 1 0))
     'set-parameter!))

  (define (get-parameter param #!optional default)
    (initialize)
    ((foreign-lambda int "espeak_GetParameter" int int)
     param
     (if default 0 1)))

  (define (set-punctuation-list! str)
    (espeak-error
     ((foreign-lambda* int ((c-string str) (size_t len))
        "wchar_t * buf = malloc(sizeof(wchar_t) * len + 1);"
        "mbstowcs(buf,str,len);"
        "int res = espeak_SetPunctuationList(buf);"
        "free(buf);"
        "C_return(res);")
      str (string-length str))
     'set-punctuation-list!))

  ;; This segfaults if no language has been set, so we set the default voice
  (define (text->phonemes input
                          #!key
                          ipa
                          tie
                          separator)
    (initialize)
    (let* ((mode (bitwise-xor (if ipa #b10 0)
                              (if tie #b10000000 0)
                              (arithmetic-shift (char->integer (or separator #\null)) 8))))
      (when (not (voice-language (get-current-voice)))
        (set-voice-by-properties! (get-current-voice)))
      ((foreign-lambda* c-string (((const c-string) input) (int mode))
         "const void *ptr = input;
          C_return(espeak_TextToPhonemes(&ptr, espeakCHARS_AUTO, mode));")
       input mode)))

  (define-external (make_voice_list (c-pointer lst)) scheme-object
    (let loop ((i 0))
      (if ((foreign-lambda* bool (((c-pointer (c-pointer "espeak_VOICE")) lst) (int i))
             "C_return(lst[i] == NULL);") lst i)
          '()
          (cons ((foreign-lambda* c-pointer (((c-pointer (c-pointer "espeak_VOICE")) lst) (int i))
                   "C_return(lst[i]);")
                 lst i)
                (loop (add1 i))))))

  (define (list-voices #!optional voice)
    (initialize)
    (map ptr->voice
         (if voice
             ((foreign-safe-lambda* scheme-object ((espeak-voice voice))
                "
const espeak_VOICE **voices = espeak_ListVoices(voice);
C_return(make_voice_list(voices));
") voice)
             ((foreign-safe-lambda* scheme-object ()
                "
const espeak_VOICE **voices = espeak_ListVoices(NULL);
C_return(make_voice_list(voices));
")))))

  (define (set-voice-by-file! filename)
    (initialize)
    (espeak-error
     ((foreign-lambda int "espeak_SetVoiceByFile" c-string) filename)
     'set-voice-by-file!))

  (define (set-voice-by-name! name)
    (initialize)
    (espeak-error
     ((foreign-lambda int "espeak_SetVoiceByName" c-string) name)
     'set-voice-by-name!))

  (define (set-voice-by-properties! voice)
    (initialize)
    ;; This is cause in same cases we may want to set the voice from default,
    ;; or we've just queried some voice or other and the language is return
    ;; witha preference byte, but you can't actually pass that to this
    ;; function, so we remove it if present.

    ;; If there's non-alphabetic starting voices, uh... let me know.
    (let ((language (voice-language voice)))
      (when (and language
                 (not (char-alphabetic? (string-ref language 0))))
        (set! (voice-language voice) (substring language 1))))
    (espeak-error
     ((foreign-lambda int "espeak_SetVoiceByProperties" espeak-voice) voice)
     'set-voice-by-properties!))

  (define  get-current-voice
    (foreign-lambda espeak-voice "espeak_GetCurrentVoice"))

  (define (cancel)
    (espeak-error
     ((foreign-lambda int "espeak_Cancel"))
     'cancel))

  (define playing?
     (foreign-lambda bool "espeak_IsPlaying"))

  (define (synchronize)
    (espeak-error
     ((foreign-lambda int "espeak_Synchronize"))
     'synchronize))

  (define (terminate)
    (initialize)
    (espeak-error
     ((foreign-lambda int "espeak_Terminate"))
     'terminate))

  (define (info)
    ((foreign-primitive ()
         "char *path;"
       "const char* version = espeak_Info((const char **)&path);"
       "int lenver = strlen(version);"
       "int lenpath = strlen(path);"
       "C_word *verptr = C_alloc(C_SIZEOF_STRING(lenver));"
       "C_word *pathptr = C_alloc(C_SIZEOF_STRING(lenpath));"
       "C_word av[4] = {C_SCHEME_UNDEFINED, C_k,
                        C_string(&verptr, lenver, (char *)version),
                        C_string(&pathptr, lenpath, path)};"
       "C_values(4,av);")))

  ;; Attempts to set params/properties only  within the scope of this function.
  ;; if async, you shouldn't be messing with these while the audio is playing
  (define (say text #!key
               (sync #f)
               ;; Voice properties
               (name (voice-name (get-current-voice)))
               (language (voice-language (get-current-voice)))
               (identifier (voice-identifier (get-current-voice)))
               (gender (voice-gender (get-current-voice)))
               (age (voice-age (get-current-voice)))
               (variant (voice-variant (get-current-voice)))
               ;; (voice (make-voice))
               ;; Parameters
               (rate (get-parameter param/rate))
               (volume (get-parameter param/volume))
               (pitch (get-parameter param/pitch))
               (range (get-parameter param/range))
               (punctuation (get-parameter param/punctuation))
               (capitals (get-parameter param/capitals))
               (wordgap (get-parameter param/wordgap)))
    (initialize)
    (let ((voice (make-voice #:name name
                             #:language language
                             #:identifier identifier
                             #:gender gender
                             #:age age
                             #:variant variant)))
      (set-voice-by-properties! voice)
      (set-parameter! param/rate rate)
      (set-parameter! param/volume volume)
      (set-parameter! param/pitch pitch)
      (set-parameter! param/range range)
      (set-parameter! param/punctuation punctuation)
      (set-parameter! param/capitals capitals)
      (set-parameter! param/wordgap wordgap)
      (synth text)
      (when sync (synchronize))))

  (define (reset-defaults!)
    (set-voice-by-properties! (make-voice))
    (set-parameter! param/rate (get-parameter param/rate 'default))
    (set-parameter! param/volume (get-parameter param/volume 'default))
    (set-parameter! param/pitch (get-parameter param/pitch 'default))
    (set-parameter! param/range (get-parameter param/range 'default))
    (set-parameter! param/punctuation (get-parameter param/punctuation 'default))
    (set-parameter! param/capitals (get-parameter param/capitals 'default))
    (set-parameter! param/wordgap (get-parameter param/wordgap 'default))))