(defun my-french-en-to-fr (text &optional display-only)
(interactive (list (read-string "Text: ") current-prefix-arg))
(let* ((url "https://translation.googleapis.com/language/translate/v2")
(params `(("key" . ,(getenv "GOOGLE_API_KEY"))
("q" . ,text)
("source" . "en")
("target" . "fr")
("format" . "text")))
(query-string (mapconcat
(lambda (pair)
(format "%s=%s"
(url-hexify-string (car pair))
(url-hexify-string (cdr pair))))
params
"&"))
(full-url (concat url "?" query-string)))
(let* ((response (plz 'get full-url :as #'json-read))
(data (alist-get 'data response))
(translations (alist-get 'translations data))
(first-translation (car translations))
(translated-text (alist-get 'translatedText first-translation)))
(when (called-interactively-p 'any)
(if display-only
(message "%s" translated-text)
(insert translated-text)))
translated-text)))
I think it would be even nicer if I could use speech synthesis, so I can keep it a little more separate from my typing thoughts. I want to be able to say "Okay, translate …" or "Okay, … in French" to get a translation. I've been using my fork of natrys/whisper.el for speech recognition in English, and I like it a lot. By adding a function to whisper-after-transcription-hook, I can modify the intermediate results before they're inserted into the buffer.
(defun my-whisper-translate ()
(goto-char (point-min))
(let ((case-fold-search t))
(when (re-search-forward "okay[,\\.]? translate[,\\.]? \\(.+\\)\\|okay[,\\.]? \\(.+?\\) in French" nil t)
(let* ((s (or (match-string 1) (match-string 2)))
(translation (save-match-data (my-french-en-to-fr s))))
(replace-match
(propertize translation
'type-hint translation
'help-echo s))))))
(with-eval-after-load 'whisper
(add-hook 'whisper-after-transcription-hook 'my-whisper-translate 70))
But that's too easy. I want to actually type things myself so that I get more practice. Something like an autocomplete suggestion would be handy as a way of showing me a hint at the cursor. The usual completion-at-point functions are too eager to insert things if there's only one candidate, so we'll just fake it with an overlay. This code works only with my whisper.el fork because it supports using a list of functions for whisper-insert-text-at-point.
(defun my-whisper-maybe-type-with-hints (text)
"Add this function to `whisper-insert-text-at-point'."
(let ((hint (and text (org-find-text-property-in-string 'type-hint text))))
(if hint
(progn
(my-type-with-hint hint)
nil)
text)))
(defvar-local my-practice-overlay nil)
(defvar-local my-practice-target nil)
(defvar-local my-practice-start nil)
(defun my-practice-cleanup ()
"Remove the overlay and stop monitoring."
(when (overlayp my-practice-overlay)
(delete-overlay my-practice-overlay))
(setq my-practice-overlay nil
my-practice-target nil
my-practice-start nil)
(remove-hook 'post-command-hook #'my-practice-monitor t))
(defun my-practice-monitor ()
"Updates hint or cancels."
(let* ((pos (point))
(input (buffer-substring-no-properties my-practice-start pos))
(input-len (length input))
(target-len (length my-practice-target)))
(cond
((or (< pos my-practice-start)
(> pos (+ my-practice-start target-len))
(string-match "[\n\t]" input)
(string= input my-practice-target))
(my-practice-cleanup))
((string-prefix-p (downcase input) (downcase my-practice-target))
(let ((remaining (substring my-practice-target input-len)))
(move-overlay my-practice-overlay pos pos)
(overlay-put my-practice-overlay 'after-string
(propertize remaining 'face 'shadow))))
(t ; typo
(move-overlay my-practice-overlay pos pos)
(overlay-put my-practice-overlay 'after-string
(propertize (substring my-practice-target input-len) 'face 'error))))))
(defun my-type-with-hint (string)
"Show hints for STRING."
(interactive "sString to practice: ")
(my-practice-cleanup)
(setq-local my-practice-target string)
(setq-local my-practice-start (point))
(setq-local my-practice-overlay (make-overlay (point) (point) nil t t))
(overlay-put my-practice-overlay 'after-string (propertize string 'face 'shadow))
(add-hook 'post-command-hook #'my-practice-monitor nil t))
Here's a demonstration of me saying "Okay, this is a test, in French.":
Since we're faking in-buffer completion here, maybe we can still get away with considering this as an entry for Emacs Carnival February 2026: Completion ? =)
You can e-mail me at [email protected].
]]>my-whisper-run, use seq-reduce to go through the functions.I want to get my thoughts into the computer quickly, and talking might be a good way to do some of that. OpenAI Whisper is reasonably good at recognizing my speech now and whisper.el gives me a convenient way to call whisper.cpp from Emacs with a single keybinding. (Note: This is not the same whisper package as the one on MELPA.) Here is how I have it set up for reasonable performance on my Lenovo P52 with just the CPU, no GPU.
I've bound <f9> to the command whisper-run. I press <f9> to start recording, talk, and then press <f9> to stop recording. By default, it inserts the text into the buffer at the current point. I've set whisper-return-cursor-to-start to nil so that I can keep going.
(use-package whisper
:vc (:url "https://github.com/natrys/whisper.el")
:load-path "~/vendor/whisper.el"
:config
(setq whisper--mode-line-recording-indicator "⏺")
(setq whisper-quantize "q4_0")
(setq whisper-install-directory "~/vendor")
(setq whisper--install-path (concat
(expand-file-name (file-name-as-directory whisper-install-directory))
"whisper.cpp/"))
;; Get it running with whisper-server-mode set to nil first before you switch to 'local.
;; If you change models,
;; (whisper-install-whispercpp (whisper--check-install-and-run nil "whisper-start"))
(setq whisper-server-mode 'local)
(setq whisper-model "base")
(setq whisper-return-cursor-to-start nil)
;(setq whisper--ffmpeg-input-device "alsa_input.usb-Blue_Microphones_Yeti_Stereo_Microphone_REV8-00.analog-stereo")
(setq whisper--ffmpeg-input-device "VirtualMicSink.monitor")
(setq whisper-language "en")
(setq whisper-recording-timeout 3000)
(setq whisper-before-transcription-hook nil)
(setq whisper-use-threads (1- (num-processors)))
(setq whisper-transcription-buffer-name-function 'whisper--simple-transcription-buffer-name)
(add-hook 'whisper-after-transcription-hook 'my-subed-fix-common-errors-from-start -100)
:bind
(("<f9>" . whisper-run)
("C-<f9>" . my-whisper-run)
("S-<f9>" . my-whisper-replay)
("M-<f9>" . my-whisper-toggle-language)))
Let's see if we can process "Computer remind me to…":
(defvar my-whisper-org-reminder-template "t")
(defun my-whisper-org-process-reminder ()
(let ((text (buffer-string))
reminder)
(when (string-match "computer[,\.]? reminds? me to \\(.+\\)" text)
(setq reminder (match-string 1 text))
(save-window-excursion
(with-current-buffer (if (markerp whisper--marker) (marker-buffer whisper--marker) (current-buffer))
(when (markerp whisper--marker) (goto-char whisper--marker))
(org-capture nil my-whisper-org-reminder-template)
(insert reminder)
(org-capture-finalize)))
(erase-buffer))))
(with-eval-after-load 'whisper
(add-hook 'whisper-after-transcription-hook 'my-whisper-org-process-reminder 50))
Disk space is inexpensive and backups are great, so let's save each file using the timestamp.
(defvar my-whisper-dir "~/recordings/whisper/")
(defun my-whisper-set-temp-filename ()
(setq whisper--temp-file (expand-file-name
(format-time-string "%Y-%m-%d-%H-%M-%S.wav")
my-whisper-dir)))
(with-eval-after-load 'whisper
(add-hook 'whisper-before-transcription-hook #'my-whisper-set-temp-filename))
The technology isn't quite there yet to do real-time audio transcription so that I can see what it understands while I'm saying things, but that might be distracting anyway. If I do it in short segments, it might still be okay. I can replay the most recently recorded snippet in case it's missed something and I've forgotten what I just said.
(defun my-whisper-replay (&optional file)
"Replay the last temporary recording."
(interactive (list
(when current-prefix-arg
(read-file-name "File: " my-whisper-dir))))
(setq whisper--temp-file (or file whisper--temp-file))
(mpv-play whisper--temp-file))
(defun my-whisper-insert-retry (&optional file)
(interactive (list
(when current-prefix-arg
(read-file-name "File: " my-whisper-dir))))
(whisper--cleanup-transcription)
(setq whisper--marker (point-marker)
whisper--temp-file (or file whisper--temp-file))
(whisper--transcribe-audio))
Il peut aussi comprendre le français.
(defun my-whisper-toggle-language ()
"Set the language explicitly, since sometimes auto doesn't figure out the right one."
(interactive)
(setq whisper-language (if (string= whisper-language "en") "fr" "en"))
;; If using a server, we need to restart for the language
(when (process-live-p whisper--server-process) (kill-process whisper--server-process))
(message "%s" whisper-language))
I could use this with org-capture, but that's a lot of keystrokes. My shortcut for org-capture is C-c r. I need to press at least one key to set the template, <f9> to start recording, <f9> to stop recording, and C-c C-c to save it. I want to be able to capture notes to my currently clocked in task without having an Org capture buffer interrupt my display.
To clock in, I can use C-c C-x i or my ! speed command. Bonus: the modeline displays the current task to keep me on track, and I can use org-clock-goto (which I've bound to C-c j) to jump to it.
Then, when I'm looking at something else and I want to record a note, I can press <f9> to start the recording, and then C-<f9> to save it to my currently clocked task along with a link to whatever I'm looking at. (Update: Ooh, now I can save a screenshot too.)
(defun my-whisper-reset (text)
(setq my-whisper-skip-annotation nil)
(remove-hook 'whisper-insert-text-at-point #'my-whisper-org-save-to-clocked-task)
text)
;; Only works with my tweaks to whisper.el
;; https://github.com/sachac/whisper.el/tree/whisper-insert-text-at-point-function
(with-eval-after-load 'whisper
(setq whisper-insert-text-at-point
'(my-whisper-handle-commands
my-whisper-save-text
my-whisper-save-to-file
my-whisper-maybe-expand-snippet
my-whisper-maybe-type
my-whisper-maybe-type-with-hints
my-whisper-insert
my-whisper-reset)))
(defvar my-whisper-last-annotation nil "Last annotation so we can skip duplicates.")
(defvar my-whisper-skip-annotation nil)
(defvar my-whisper-target-markers nil "List of markers to send text to.")
(defun my-whisper-insert (text)
(let ((markers
(cond
((null my-whisper-target-markers)
(list whisper--marker)) ; current point where whisper was started
((listp my-whisper-target-markers)
my-whisper-target-markers)
((markerp my-whisper-target-markers)
(list my-whisper-target-markers))))
(orig-point (point))
(orig-buffer (current-buffer)))
(when text
(mapcar (lambda (marker)
(with-current-buffer (marker-buffer marker)
(save-restriction
(widen)
(when (markerp marker) (goto-char marker))
(when (and (derived-mode-p 'org-mode) (org-at-drawer-p))
(insert "\n"))
(whisper--insert-text
(concat
(if (looking-back "[ \t\n]\\|^")
""
" ")
(string-trim text)))
;; Move the marker forward here
(move-marker marker (point)))))
markers)
(when my-whisper-target-markers
(goto-char orig-point))
nil)))
(defun my-whisper-maybe-type (text)
(when text
(if (frame-focus-state)
text
(make-process :name "xdotool" :command
(list "xdotool" "type"
text))
nil)))
(defun my-whisper-clear-markers ()
(interactive)
(setq my-whisper-target-markers nil))
(defun my-whisper-use-current-point (&optional add)
(interactive (list current-prefix-arg))
(if add
(push (point-marker) my-whisper-target-markers)
(setq my-whisper-target-markers (list (point-marker)))))
(defun my-whisper-run-at-point (&optional add)
(interactive (list current-prefix-arg))
(my-whisper-clear-markers)
(whisper-run))
(keymap-global-set "<f9>" #'my-whisper-run-at-point)
(keymap-global-set "<kp-1>" #'whisper-run)
(defun my-whisper-jump-to-marker ()
(interactive)
(with-current-buffer (marker-buffer (car my-whisper-target-markers))
(goto-char (car my-whisper-target-markers))))
(defun my-whisper-use-currently-clocked-task (&optional add)
(interactive (list current-prefix-arg))
(save-window-excursion
(save-restriction
(save-excursion
(org-clock-goto)
(org-end-of-meta-data)
(org-end-of-subtree)
(if add
(push (point-marker) my-whisper-target-markers)
(setq my-whisper-target-markers (list (point-marker))))))))
(defun my-whisper-run (&optional skip-annotation)
(interactive (list current-prefix-arg))
(require 'whisper)
(add-hook 'whisper-insert-text-at-point #'my-whisper-org-save-to-clocked-task -10)
(whisper-run)
(when skip-annotation
(setq my-whisper-skip-annotation t)))
(defun my-whisper-save-text (text)
"Save TEXT beside `whisper--temp-file'."
(when text
(let ((link (org-store-link nil)))
(with-temp-file (concat (file-name-sans-extension whisper--temp-file) ".txt")
(when link
(insert link "\n"))
(insert text)))
text))
(defun my-whisper-org-save-to-clocked-task (text)
(when text
(save-window-excursion
(with-current-buffer (if (markerp whisper--marker) (marker-buffer whisper--marker) (current-buffer))
(when (markerp whisper--marker) (goto-char whisper--marker))
;; Take a screenshot maybe
(let* ((link (and (not my-whisper-skip-annotation)
(org-store-link nil)))
(region (and (region-active-p) (buffer-substring (region-beginning) (region-end))))
(screenshot-filename
(when (or
(null link)
(not (string= my-whisper-last-annotation link))
(not (frame-focus-state))) ; not in focus, take a screenshot
(my-screenshot-current-screen (concat (file-name-sans-extension whisper--temp-file) ".png")))))
(if (org-clocking-p)
(save-window-excursion
(save-restriction
(save-excursion
(org-clock-goto)
(org-end-of-subtree)
(unless (bolp)
(insert "\n"))
(insert "\n")
(if (and link (not (string= my-whisper-last-annotation link)))
(insert
(if screenshot-filename
(concat "(" (org-link-make-string
(concat "file:" screenshot-filename)
"screenshot") ") ")
"")
link
"\n")
(when screenshot-filename
(insert (org-link-make-string
(concat "file:" screenshot-filename)
"screenshot")
"\n")))
(when region
(insert "#+begin_example\n" region "\n#+end_example\n"))
(insert text "\n")
(setq my-whisper-last-annotation link)))
(run-at-time 0.5 nil (lambda (text) (message "Added clock note: %s" text)) text))
;; No clocked task, prompt for a place to capture it
(kill-new text)
(setq org-capture-initial text)
(call-interactively 'org-capture)
;; Delay the window configuration
(let ((config (current-window-configuration)))
(run-at-time 0.5 nil
(lambda (text config)
(set-window-configuration config)
(message "Copied: %s" text))
text config))))))))
(with-eval-after-load 'org
(add-hook 'org-clock-in-hook #'my-whisper-org-clear-saved-annotation))
(defun my-whisper-org-clear-saved-annotation ()
(setq my-whisper-org-last-annotation nil))
Here's an idea for a function that saves the recognized text with a timestamp.
(defvar my-whisper-notes "~/sync/stream/narration.org")
(defun my-whisper-save-to-file (text)
(when text
(let ((link (org-store-link nil)))
(with-current-buffer (find-file-noselect my-whisper-notes)
(goto-char (point-max))
(insert "\n\n" (format-time-string "%H:%M ") text "\n" (if link (concat link "\n") ""))
(save-buffer)
(run-at-time 0.5 nil (lambda (text) (message "Saved to file: %s" text)) text)))
text))
And now I can redo things if needed:
(defun my-whisper-redo ()
(interactive)
(setq whisper--marker (point-marker))
(whisper--transcribe-audio))
I think I've just figured out my Pipewire setup so that I can record audio in OBS while also being able to do speech to text, without the audio stuttering. qpwgraph was super helpful for visualizing the Pipewire connections and fixing them.
systemctl --user restart pipewire
sleep 2
pactl load-module module-null-sink \
sink_name="VirtualMicSink" sink_properties=device.description=VirtualMicSink
pactl load-module module-null-sink \
sink_name="CombinedSink" sink_properties=device.description=CombinedSink
if pactl list short sources | grep -i pci-0000; then
pactl load-module module-loopback \
source="alsa_input.pci-0000_00_1f.3.analog-stereo" \
sink="VirtualMicSink" \
latency_msec=100 \
adjust_time=1 \
source_output_properties="node.description='SysToVMic' node.name='SysToVMic' media.name='SysVToMic'" \
sink_input_properties="node.description='SysToVMic' node.name='SysToVMic' media.role='filter'"
sink_input_properties=media.role=filter
pactl load-module module-loopback \ source="alsa_output.pci-0000_00_1f.3.analog-stereo.monitor" \
sink="CombinedSink" \
node_name="SystemOutToCombined" \
source_output_properties="node.description='SysOutToCombined' node.name='SysOutToCombined'" \
sink_input_properties="node.description='SysOutToCombined' node.name='SysOutToCombined' media.role='filter'" \
latency_msec=100 adjust_time=1
fi
if pactl list short sources | grep -i yeti; then
pactl load-module module-loopback \
source="alsa_input.usb-Blue_Microphones_Yeti_Stereo_Microphone_REV8-00.analog-stereo" \
sink="VirtualMicSink" \
latency_msec=100 \
adjust_time=1 \
source_output_properties="node.description='YetiToVMic' node.name='YetiToVMic' media.name='YetiToVMic'" \
sink_input_properties="node.description='YetiToVMic' node.name='YetiToVMic' media.role='filter'"
pactl load-module module-loopback \ source="alsa_output.usb-Blue_Microphones_Yeti_Stereo_Microphone_REV8-00.analog-stereo.monitor" \
sink="CombinedSink" \
source_output_properties="node.description='YetiOutToCombined' node.name='YetiOutToCombined' media.name='YetiOutToCombined' " \
sink_input_properties="node.description='YetiOutToCombined' node.name='YetiOutToCombined' media.role='filter'" \
latency_msec=100 adjust_time=1
fi
pactl load-module module-loopback \
source="VirtualMicSink.monitor" \
sink="CombinedSink" \
source_output_properties="node.description='VMicToCombined' node.name='VMicToCombined' media.name='VMicToCombined'" \
sink_input_properties="node.description='VMicToCombined' node.name='VMicToCombined' media.role='filter'" \
latency_msec=100 adjust_time=1
pactl load-module module-null-sink \
sink_name="ExtraSink1" sink_properties=device.description=ExtraSink1
pactl load-module module-loopback \
source="ExtraSink1.monitor" \
sink="CombinedSink" \
source_output_properties="node.description='ExtraSink1ToCombined' node.name='ExtraSink1ToCombined' media.name='ExtraSink1ToCombined'" \
sink_input_properties="node.description='ExtraSink1ToCombined' node.name='ExtraSink1ToCombined' media.role='filter'" \
latency_msec=100 adjust_time=1
Here's a demo:
And then I define a global shortcut in KDE that runs:
/home/sacha/bin/xdotool-emacs key --clearmodifiers F9
So now I can dictate into other applications or save into Emacs. Which suggests of course that I should get it working with C-f9 as well, if I can avoid the keyboard shortcut loop…
You can comment on Mastodon or e-mail me at [email protected].
]]>: Fix timestamp format in toggle recording task.
I want to be able to use voice control to do things on my phone while I'm busy washing dishes, putting things away, knitting, or just keeping my hands warm. It'll also be handy to have a way to get things out of my head when the kiddo is koala-ing me. I've been using my Google Pixel 8's voice interface to set timers, send text messages, and do quick web searches. Building on my recent thoughts on wearable computing, I decided to spend some more time investigating the Google Assistant and Voice Access features in Android and setting up other voice shortcuts.
I switched back to Google Assistant from Gemini so that I could run Tasker routines. I also found out that I needed to switch the language from English/Canada to English/US in order for my Tasker scripts to run instead of Google Assistant treating them as web searches. Once that was sorted out, I could run Tasker tasks with "Hey Google, run {task-name} in Tasker" and parameterize them with "Hey Google, run {task-name} with {parameter} in Tasker."
Learning how to use Voice Access to navigate, click, and type on my phone was straightforward. "Scroll down" works for webpages, while "scroll right" works for the e-books I have in Libby. Tapping items by text usually works. When it doesn't, I can use "show labels", "show numbers", or "show grid." The speech-to-text of "type …" isn't as good as Whisper, so I probably won't use it for a lot of dictation, but it's fine for quick notes. I can keep recording in the background so that I have the raw audio in case I want to review it or grab the WhisperX transcripts instead.
For some reason, saying "Hey Google, voice access" to start up voice access has been leaving the Assistant dialog on the screen, which makes it difficult to interact with the screen I'm looking at. I added a Tasker routine to start voice access, wait a second, and tap on the screen to dismiss the Assistant dialog.
Start Voice.tsk.xml - Import via Taskernet
<TaskerData sr="" dvi="1" tv="6.3.13"> <Task sr="task24"> <cdate>1737565479418</cdate> <edate>1737566416661</edate> <id>24</id> <nme>Start Voice</nme> <pri>1000</pri> <Share sr="Share"> <b>false</b> <d>Start voice access and dismiss the assistant dialog</d> <g>Accessibility,AutoInput</g> <p>true</p> <t></t> </Share> <Action sr="act0" ve="7"> <code>20</code> <App sr="arg0"> <appClass>com.google.android.apps.accessibility.voiceaccess.LauncherActivity</appClass> <appPkg>com.google.android.apps.accessibility.voiceaccess</appPkg> <label>Voice Access</label> </App> <Str sr="arg1" ve="3"/> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> </Action> <Action sr="act1" ve="7"> <code>30</code> <Int sr="arg0" val="0"/> <Int sr="arg1" val="1"/> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> <Int sr="arg4" val="0"/> </Action> <Action sr="act2" ve="7"> <code>107361459</code> <Bundle sr="arg0"> <Vals sr="val"> <EnableDisableAccessibilityService><null></EnableDisableAccessibilityService> <EnableDisableAccessibilityService-type>java.lang.String</EnableDisableAccessibilityService-type> <Password><null></Password> <Password-type>java.lang.String</Password-type> <com.twofortyfouram.locale.intent.extra.BLURB>Actions To Perform: click(point,564\,1045) Not In AutoInput: true Not In Tasker: true Separator: , Check Millis: 1000</com.twofortyfouram.locale.intent.extra.BLURB> <com.twofortyfouram.locale.intent.extra.BLURB-type>java.lang.String</com.twofortyfouram.locale.intent.extra.BLURB-type> <net.dinglisch.android.tasker.JSON_ENCODED_KEYS>parameters</net.dinglisch.android.tasker.JSON_ENCODED_KEYS> <net.dinglisch.android.tasker.JSON_ENCODED_KEYS-type>java.lang.String</net.dinglisch.android.tasker.JSON_ENCODED_KEYS-type> <net.dinglisch.android.tasker.RELEVANT_VARIABLES><StringArray sr=""><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0>%ailastbounds Last Bounds Bounds (left,top,right,bottom) of the item that the action last interacted with</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1>%ailastcoordinates Last Coordinates Center coordinates (x,y) of the item that the action last interacted with</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES2>%err Error Code Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES2><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES3>%errmsg Error Message Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES3></StringArray></net.dinglisch.android.tasker.RELEVANT_VARIABLES> <net.dinglisch.android.tasker.RELEVANT_VARIABLES-type>[Ljava.lang.String;</net.dinglisch.android.tasker.RELEVANT_VARIABLES-type> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS>parameters plugininstanceid plugintypeid </net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type>java.lang.String</net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type> <net.dinglisch.android.tasker.subbundled>true</net.dinglisch.android.tasker.subbundled> <net.dinglisch.android.tasker.subbundled-type>java.lang.Boolean</net.dinglisch.android.tasker.subbundled-type> <parameters>{"_action":"click(point,564\\,1045)","_additionalOptions":{"checkMs":"1000","separator":",","withCoordinates":false},"_whenToPerformAction":{"notInAutoInput":true,"notInTasker":true},"generatedValues":{}}</parameters> <parameters-type>java.lang.String</parameters-type> <plugininstanceid>b46b8afc-c840-40ad-9283-3946c57a1018</plugininstanceid> <plugininstanceid-type>java.lang.String</plugininstanceid-type> <plugintypeid>com.joaomgcd.autoinput.intent.IntentActionv2</plugintypeid> <plugintypeid-type>java.lang.String</plugintypeid-type> </Vals> </Bundle> <Str sr="arg1" ve="3">com.joaomgcd.autoinput</Str> <Str sr="arg2" ve="3">com.joaomgcd.autoinput.activity.ActivityConfigActionv2</Str> <Int sr="arg3" val="60"/> <Int sr="arg4" val="1"/> </Action> </Task> </TaskerData>
I can use "Hey Google, read aloud" to read a webpage. I can use "Hey Google, skip ahead 2 minutes" or "Hey Google, rewind 30 seconds." Not sure how I can navigate by text, though. It would be nice to get an overview of headings and then jump to the one I want, or search for text and continue from there.
I wanted to be able to play random emacs.tv videos without needing to touch my phone. I added autoplay support to the web interface so that you can open https://emacs.tv?autoplay=1 and have it autoplay videos when you select the next random one by clicking on the site logo, "Lucky pick", or the dice icon. The first video doesn't autoplay because YouTube requires user interaction in order to autoplay unmuted videos, but I can work around that with a Tasker script that loads the URL, waits a few seconds, and clicks on the heading with AutoInput.
Emacs TV.tsk.xml - Import via Taskernet
<TaskerData sr="" dvi="1" tv="6.3.13"> <Task sr="task18"> <cdate>1737558964554</cdate> <edate>1737562488128</edate> <id>18</id> <nme>Emacs TV</nme> <pri>1000</pri> <Share sr="Share"> <b>false</b> <d>Play random Emacs video</d> <g>Watch</g> <p>true</p> <t></t> </Share> <Action sr="act0" ve="7"> <code>104</code> <Str sr="arg0" ve="3">https://emacs.tv?autoplay=1</Str> <App sr="arg1"/> <Int sr="arg2" val="0"/> <Str sr="arg3" ve="3"/> </Action> <Action sr="act1" ve="7"> <code>30</code> <Int sr="arg0" val="0"/> <Int sr="arg1" val="3"/> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> <Int sr="arg4" val="0"/> </Action> <Action sr="act2" ve="7"> <code>107361459</code> <Bundle sr="arg0"> <Vals sr="val"> <EnableDisableAccessibilityService><null></EnableDisableAccessibilityService> <EnableDisableAccessibilityService-type>java.lang.String</EnableDisableAccessibilityService-type> <Password><null></Password> <Password-type>java.lang.String</Password-type> <com.twofortyfouram.locale.intent.extra.BLURB>Actions To Perform: click(point,229\,417) Not In AutoInput: true Not In Tasker: true Separator: , Check Millis: 1000</com.twofortyfouram.locale.intent.extra.BLURB> <com.twofortyfouram.locale.intent.extra.BLURB-type>java.lang.String</com.twofortyfouram.locale.intent.extra.BLURB-type> <net.dinglisch.android.tasker.JSON_ENCODED_KEYS>parameters</net.dinglisch.android.tasker.JSON_ENCODED_KEYS> <net.dinglisch.android.tasker.JSON_ENCODED_KEYS-type>java.lang.String</net.dinglisch.android.tasker.JSON_ENCODED_KEYS-type> <net.dinglisch.android.tasker.RELEVANT_VARIABLES><StringArray sr=""><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0>%ailastbounds Last Bounds Bounds (left,top,right,bottom) of the item that the action last interacted with</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1>%ailastcoordinates Last Coordinates Center coordinates (x,y) of the item that the action last interacted with</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES2>%err Error Code Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES2><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES3>%errmsg Error Message Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES3></StringArray></net.dinglisch.android.tasker.RELEVANT_VARIABLES> <net.dinglisch.android.tasker.RELEVANT_VARIABLES-type>[Ljava.lang.String;</net.dinglisch.android.tasker.RELEVANT_VARIABLES-type> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS>parameters plugininstanceid plugintypeid </net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type>java.lang.String</net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type> <net.dinglisch.android.tasker.subbundled>true</net.dinglisch.android.tasker.subbundled> <net.dinglisch.android.tasker.subbundled-type>java.lang.Boolean</net.dinglisch.android.tasker.subbundled-type> <parameters>{"_action":"click(point,229\\,417)","_additionalOptions":{"checkMs":"1000","separator":",","withCoordinates":false},"_whenToPerformAction":{"notInAutoInput":true,"notInTasker":true},"generatedValues":{}}</parameters> <parameters-type>java.lang.String</parameters-type> <plugininstanceid>45ce7a83-47e5-48fb-8c3e-20655e668353</plugininstanceid> <plugininstanceid-type>java.lang.String</plugininstanceid-type> <plugintypeid>com.joaomgcd.autoinput.intent.IntentActionv2</plugintypeid> <plugintypeid-type>java.lang.String</plugintypeid-type> </Vals> </Bundle> <Str sr="arg1" ve="3">com.joaomgcd.autoinput</Str> <Str sr="arg2" ve="3">com.joaomgcd.autoinput.activity.ActivityConfigActionv2</Str> <Int sr="arg3" val="60"/> <Int sr="arg4" val="1"/> </Action> </Task> </TaskerData>
Then I set up a Google Assistant routine with the triggers "teach me" or "Emacs TV" and the action "run Emacs TV in Tasker. Now I can say "Hey Google, teach me" and it'll play a random Emacs video for me. I can repeat "Hey Google, teach me" to get a different video, and I can pause with "Hey Google, pause video".
This was actually my second approach. The first
time I tried to implement this, I thought about
using Voice Access to interact with the buttons.
Strangely, I couldn't get Voice Access to click on
the header links or the buttons even when I had
aria-label, role="button", and tabindex
attributes set on them. As a hacky workaround, I
made the site logo pick a new random video when
clicked, so I can at least use it as a large touch
target when I use "display grid" in Voice Access.
("Tap 5" will load the next video.)
There doesn't seem to be a way to add custom voice access commands to a webpage in a way that hooks into Android Voice Access and iOS Voice Control, but maybe I'm just missing something obvious when it comes to ARIA attributes.
There were some words that I couldn't get Google Assistant or Voice Access to understand, like "open Orgzly Revived". Fortunately, "Open Revived" worked just fine.
I wanted to be able to see my Org Agenda. After some fiddling around (see the resources in this section), I figured out this AutoShare intent that runs an agenda search:
{
"target": "Activity",
"appname": "Orgzly Revived",
"action": "android.intent.action.MAIN",
"package": "com.orgzlyrevived",
"class": "com.orgzly.android.ui.main.MainActivity",
"extras": [
{
"type": "String",
"key": "com.orgzly.intent.extra.QUERY_STRING",
"name": "Query"
}
],
"name": "Search",
"id": "Orgzly-search"
}
Then I defined a Tasker task called "Search Orgzly Revived":
Download Search Orgzly Revived.tsk.xml
<TaskerData sr="" dvi="1" tv="6.3.13"> <Task sr="task16"> <cdate>1676823952566</cdate> <edate>1737567565538</edate> <id>16</id> <nme>Search Orgzly Revived</nme> <pri>100</pri> <Share sr="Share"> <b>false</b> <d>Search Orgzly Revived</d> <g>Work,Well-Being</g> <p>false</p> <t></t> </Share> <Action sr="act0" ve="7"> <code>18</code> <App sr="arg0"> <appClass>com.orgzly.android.ui.LauncherActivity</appClass> <appPkg>com.orgzlyrevived</appPkg> <label>Orgzly Revived</label> </App> <Int sr="arg1" val="0"/> </Action> <Action sr="act1" ve="7"> <code>547</code> <Str sr="arg0" ve="3">%extra</Str> <Str sr="arg1" ve="3">com.orgzly.intent.extra.QUERY_STRING:%par1</Str> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> <Int sr="arg4" val="0"/> <Int sr="arg5" val="3"/> <Int sr="arg6" val="1"/> </Action> <Action sr="act2" ve="7"> <code>877</code> <Str sr="arg0" ve="3">android.intent.action.MAIN</Str> <Int sr="arg1" val="0"/> <Str sr="arg2" ve="3"/> <Str sr="arg3" ve="3"/> <Str sr="arg4" ve="3">%extra</Str> <Str sr="arg5" ve="3"/> <Str sr="arg6" ve="3"/> <Str sr="arg7" ve="3">com.orgzlyrevived</Str> <Str sr="arg8" ve="3">com.orgzly.android.ui.main.MainActivity</Str> <Int sr="arg9" val="1"/> </Action> <Img sr="icn" ve="2"> <nme>mw_action_today</nme> </Img> </Task> </TaskerData>
I made a Google Assistant routine that uses "show my agenda" as the trigger and "run search orgzly revived in Tasker" as the action. After a quick "Hey Google, show my agenda; Hey Google, voice access", I can use "scroll down" to page through the list. "Back" gets me to the list of notebooks, and "inbox" opens my inbox.
Resources:
When I'm looking at an Orgzly Revived notebook with Voice Access turned on, "plus" starts a new note. Anything that isn't a label gets typed, so I can just start saying the title of my note (or use "type …"). If I want to add the content, I have to use "hide keyboard", "tap content", and then "type …"). "Tap scheduled time; Tomorrow" works if the scheduled time widget is visible, so I just need to use "scroll down" if the title is long. "Tap done; one" saves it.
Adding a note could be simpler - maybe a Tasker task that prompts me for text and adds it. I could use Tasker to prepend to my Inbox.org and then reload it in Orgzly. It would be more elegant to figure out the intent for adding a note, though. Maybe in the Orgzly Android intent receiver documentation?
When I'm looking at the Orgzly notebook and I say part of the text in a note without a link, it opens the note. If the note has a link, it seems to open the link directly. Tapping by numbers also goes to the link, but tapping by grid opens the note.
I'd love to speech-enable this someday so that I can hear Orgzly Revived step through my agenda and use my voice to mark things as cancelled/done, schedule them for today/tomorrow/next week, or add extra notes to the body.
W+ and I use the OurGroceries app. As it turns out, "Hey Google, ask OurGroceries to add milk" still works. Also, Voice Access works fine with OurGroceries. I can say "Plus", dictate an item, and tap "Add." I configured the cross-off action to be swipes instead of taps to minimize accidental crossing-off at the store, so I can say "swipe right on apples" to mark that as done.
I added a Tasker task to update my personal time-tracking system, and I added some Google Assistant routines for common categories like writing or routines. I can also use "run track with {category} in Tasker" to track a less-common category. The kiddo likes to get picked up and hugged a lot, so I added a "Hey Google, koala time" routine to clock into childcare in a more fun way. I have to enunciate that one clearly or it'll get turned into "Call into …", which doesn't work.
Since I was tinkering around with Tasker a lot, I
decided to try moving my voice recording into it.
I want to save timestamped recordings into my
~/sync/recordings directory so that they're
automatically synchronized with Syncthing, and
then they can feed into my WhisperX workflow. This
feels a little more responsive and reliable than
Fossify Voice Recorder, actually, since that one
tended to become unresponsive from time to time.
Download Toggle Recording.tsk.xml - Import via Taskernet
<TaskerData sr="" dvi="1" tv="6.3.13"> <Task sr="task12"> <cdate>1737504717303</cdate> <edate>1738272248919</edate> <id>12</id> <nme>Toggle Recording</nme> <pri>100</pri> <Share sr="Share"> <b>false</b> <d>Toggle recording on and off; save timestamped file to sync/recordings</d> <g>Sound</g> <p>true</p> <t></t> </Share> <Action sr="act0" ve="7"> <code>37</code> <ConditionList sr="if"> <Condition sr="c0" ve="3"> <lhs>%RECORDING</lhs> <op>12</op> <rhs></rhs> </Condition> </ConditionList> </Action> <Action sr="act1" ve="7"> <code>549</code> <Str sr="arg0" ve="3">%RECORDING</Str> <Int sr="arg1" val="0"/> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> </Action> <Action sr="act10" ve="7"> <code>166160670</code> <Bundle sr="arg0"> <Vals sr="val"> <ActionIconString1><null></ActionIconString1> <ActionIconString1-type>java.lang.String</ActionIconString1-type> <ActionIconString2><null></ActionIconString2> <ActionIconString2-type>java.lang.String</ActionIconString2-type> <ActionIconString3><null></ActionIconString3> <ActionIconString3-type>java.lang.String</ActionIconString3-type> <ActionIconString4><null></ActionIconString4> <ActionIconString4-type>java.lang.String</ActionIconString4-type> <ActionIconString5><null></ActionIconString5> <ActionIconString5-type>java.lang.String</ActionIconString5-type> <AppendTexts>false</AppendTexts> <AppendTexts-type>java.lang.Boolean</AppendTexts-type> <BackgroundColor><null></BackgroundColor> <BackgroundColor-type>java.lang.String</BackgroundColor-type> <BadgeType><null></BadgeType> <BadgeType-type>java.lang.String</BadgeType-type> <Button1UnlockScreen>false</Button1UnlockScreen> <Button1UnlockScreen-type>java.lang.Boolean</Button1UnlockScreen-type> <Button2UnlockScreen>false</Button2UnlockScreen> <Button2UnlockScreen-type>java.lang.Boolean</Button2UnlockScreen-type> <Button3UnlockScreen>false</Button3UnlockScreen> <Button3UnlockScreen-type>java.lang.Boolean</Button3UnlockScreen-type> <Button4UnlockScreen>false</Button4UnlockScreen> <Button4UnlockScreen-type>java.lang.Boolean</Button4UnlockScreen-type> <Button5UnlockScreen>false</Button5UnlockScreen> <Button5UnlockScreen-type>java.lang.Boolean</Button5UnlockScreen-type> <ChronometerCountDown>false</ChronometerCountDown> <ChronometerCountDown-type>java.lang.Boolean</ChronometerCountDown-type> <Colorize>false</Colorize> <Colorize-type>java.lang.Boolean</Colorize-type> <DismissOnTouchVariable><null></DismissOnTouchVariable> <DismissOnTouchVariable-type>java.lang.String</DismissOnTouchVariable-type> <ExtraInfo><null></ExtraInfo> <ExtraInfo-type>java.lang.String</ExtraInfo-type> <GroupAlertBehaviour><null></GroupAlertBehaviour> <GroupAlertBehaviour-type>java.lang.String</GroupAlertBehaviour-type> <GroupKey><null></GroupKey> <GroupKey-type>java.lang.String</GroupKey-type> <IconExpanded><null></IconExpanded> <IconExpanded-type>java.lang.String</IconExpanded-type> <IsGroupSummary>false</IsGroupSummary> <IsGroupSummary-type>java.lang.Boolean</IsGroupSummary-type> <IsGroupVariable><null></IsGroupVariable> <IsGroupVariable-type>java.lang.String</IsGroupVariable-type> <MediaAlbum><null></MediaAlbum> <MediaAlbum-type>java.lang.String</MediaAlbum-type> <MediaArtist><null></MediaArtist> <MediaArtist-type>java.lang.String</MediaArtist-type> <MediaDuration><null></MediaDuration> <MediaDuration-type>java.lang.String</MediaDuration-type> <MediaIcon><null></MediaIcon> <MediaIcon-type>java.lang.String</MediaIcon-type> <MediaLayout>false</MediaLayout> <MediaLayout-type>java.lang.Boolean</MediaLayout-type> <MediaNextCommand><null></MediaNextCommand> <MediaNextCommand-type>java.lang.String</MediaNextCommand-type> <MediaPauseCommand><null></MediaPauseCommand> <MediaPauseCommand-type>java.lang.String</MediaPauseCommand-type> <MediaPlayCommand><null></MediaPlayCommand> <MediaPlayCommand-type>java.lang.String</MediaPlayCommand-type> <MediaPlaybackState><null></MediaPlaybackState> <MediaPlaybackState-type>java.lang.String</MediaPlaybackState-type> <MediaPosition><null></MediaPosition> <MediaPosition-type>java.lang.String</MediaPosition-type> <MediaPreviousCommand><null></MediaPreviousCommand> <MediaPreviousCommand-type>java.lang.String</MediaPreviousCommand-type> <MediaTrack><null></MediaTrack> <MediaTrack-type>java.lang.String</MediaTrack-type> <MessagingImages><null></MessagingImages> <MessagingImages-type>java.lang.String</MessagingImages-type> <MessagingOwnIcon><null></MessagingOwnIcon> <MessagingOwnIcon-type>java.lang.String</MessagingOwnIcon-type> <MessagingOwnName><null></MessagingOwnName> <MessagingOwnName-type>java.lang.String</MessagingOwnName-type> <MessagingPersonBot><null></MessagingPersonBot> <MessagingPersonBot-type>java.lang.String</MessagingPersonBot-type> <MessagingPersonIcons><null></MessagingPersonIcons> <MessagingPersonIcons-type>java.lang.String</MessagingPersonIcons-type> <MessagingPersonImportant><null></MessagingPersonImportant> <MessagingPersonImportant-type>java.lang.String</MessagingPersonImportant-type> <MessagingPersonNames><null></MessagingPersonNames> <MessagingPersonNames-type>java.lang.String</MessagingPersonNames-type> <MessagingPersonUri><null></MessagingPersonUri> <MessagingPersonUri-type>java.lang.String</MessagingPersonUri-type> <MessagingSeparator><null></MessagingSeparator> <MessagingSeparator-type>java.lang.String</MessagingSeparator-type> <MessagingTexts><null></MessagingTexts> <MessagingTexts-type>java.lang.String</MessagingTexts-type> <NotificationChannelBypassDnd>false</NotificationChannelBypassDnd> <NotificationChannelBypassDnd-type>java.lang.Boolean</NotificationChannelBypassDnd-type> <NotificationChannelDescription><null></NotificationChannelDescription> <NotificationChannelDescription-type>java.lang.String</NotificationChannelDescription-type> <NotificationChannelId><null></NotificationChannelId> <NotificationChannelId-type>java.lang.String</NotificationChannelId-type> <NotificationChannelImportance><null></NotificationChannelImportance> <NotificationChannelImportance-type>java.lang.String</NotificationChannelImportance-type> <NotificationChannelName><null></NotificationChannelName> <NotificationChannelName-type>java.lang.String</NotificationChannelName-type> <NotificationChannelShowBadge>false</NotificationChannelShowBadge> <NotificationChannelShowBadge-type>java.lang.Boolean</NotificationChannelShowBadge-type> <PersistentVariable><null></PersistentVariable> <PersistentVariable-type>java.lang.String</PersistentVariable-type> <PhoneOnly>false</PhoneOnly> <PhoneOnly-type>java.lang.Boolean</PhoneOnly-type> <PriorityVariable><null></PriorityVariable> <PriorityVariable-type>java.lang.String</PriorityVariable-type> <PublicVersion><null></PublicVersion> <PublicVersion-type>java.lang.String</PublicVersion-type> <ReplyAction><null></ReplyAction> <ReplyAction-type>java.lang.String</ReplyAction-type> <ReplyChoices><null></ReplyChoices> <ReplyChoices-type>java.lang.String</ReplyChoices-type> <ReplyLabel><null></ReplyLabel> <ReplyLabel-type>java.lang.String</ReplyLabel-type> <ShareButtonsVariable><null></ShareButtonsVariable> <ShareButtonsVariable-type>java.lang.String</ShareButtonsVariable-type> <SkipPictureCache>false</SkipPictureCache> <SkipPictureCache-type>java.lang.Boolean</SkipPictureCache-type> <SoundPath><null></SoundPath> <SoundPath-type>java.lang.String</SoundPath-type> <StatusBarIconString><null></StatusBarIconString> <StatusBarIconString-type>java.lang.String</StatusBarIconString-type> <StatusBarTextSize>16</StatusBarTextSize> <StatusBarTextSize-type>java.lang.String</StatusBarTextSize-type> <TextExpanded><null></TextExpanded> <TextExpanded-type>java.lang.String</TextExpanded-type> <Time><null></Time> <Time-type>java.lang.String</Time-type> <TimeFormat><null></TimeFormat> <TimeFormat-type>java.lang.String</TimeFormat-type> <Timeout><null></Timeout> <Timeout-type>java.lang.String</Timeout-type> <TitleExpanded><null></TitleExpanded> <TitleExpanded-type>java.lang.String</TitleExpanded-type> <UpdateNotification>false</UpdateNotification> <UpdateNotification-type>java.lang.Boolean</UpdateNotification-type> <UseChronometer>false</UseChronometer> <UseChronometer-type>java.lang.Boolean</UseChronometer-type> <UseHTML>false</UseHTML> <UseHTML-type>java.lang.Boolean</UseHTML-type> <Visibility><null></Visibility> <Visibility-type>java.lang.String</Visibility-type> <com.twofortyfouram.locale.intent.extra.BLURB>Title: my recording Action on Touch: stop recording Status Bar Text Size: 16 Id: my-recording Dismiss on Touch: true Priority: -1 Separator: ,</com.twofortyfouram.locale.intent.extra.BLURB> <com.twofortyfouram.locale.intent.extra.BLURB-type>java.lang.String</com.twofortyfouram.locale.intent.extra.BLURB-type> <config_action_1_icon><null></config_action_1_icon> <config_action_1_icon-type>java.lang.String</config_action_1_icon-type> <config_action_2_icon><null></config_action_2_icon> <config_action_2_icon-type>java.lang.String</config_action_2_icon-type> <config_action_3_icon><null></config_action_3_icon> <config_action_3_icon-type>java.lang.String</config_action_3_icon-type> <config_action_4_icon><null></config_action_4_icon> <config_action_4_icon-type>java.lang.String</config_action_4_icon-type> <config_action_5_icon><null></config_action_5_icon> <config_action_5_icon-type>java.lang.String</config_action_5_icon-type> <config_notification_action>stop recording</config_notification_action> <config_notification_action-type>java.lang.String</config_notification_action-type> <config_notification_action_button1><null></config_notification_action_button1> <config_notification_action_button1-type>java.lang.String</config_notification_action_button1-type> <config_notification_action_button2><null></config_notification_action_button2> <config_notification_action_button2-type>java.lang.String</config_notification_action_button2-type> <config_notification_action_button3><null></config_notification_action_button3> <config_notification_action_button3-type>java.lang.String</config_notification_action_button3-type> <config_notification_action_button4><null></config_notification_action_button4> <config_notification_action_button4-type>java.lang.String</config_notification_action_button4-type> <config_notification_action_button5><null></config_notification_action_button5> <config_notification_action_button5-type>java.lang.String</config_notification_action_button5-type> <config_notification_action_label1><null></config_notification_action_label1> <config_notification_action_label1-type>java.lang.String</config_notification_action_label1-type> <config_notification_action_label2><null></config_notification_action_label2> <config_notification_action_label2-type>java.lang.String</config_notification_action_label2-type> <config_notification_action_label3><null></config_notification_action_label3> <config_notification_action_label3-type>java.lang.String</config_notification_action_label3-type> <config_notification_action_on_dismiss><null></config_notification_action_on_dismiss> <config_notification_action_on_dismiss-type>java.lang.String</config_notification_action_on_dismiss-type> <config_notification_action_share>false</config_notification_action_share> <config_notification_action_share-type>java.lang.Boolean</config_notification_action_share-type> <config_notification_command><null></config_notification_command> <config_notification_command-type>java.lang.String</config_notification_command-type> <config_notification_content_info><null></config_notification_content_info> <config_notification_content_info-type>java.lang.String</config_notification_content_info-type> <config_notification_dismiss_on_touch>true</config_notification_dismiss_on_touch> <config_notification_dismiss_on_touch-type>java.lang.Boolean</config_notification_dismiss_on_touch-type> <config_notification_icon><null></config_notification_icon> <config_notification_icon-type>java.lang.String</config_notification_icon-type> <config_notification_indeterminate_progress>false</config_notification_indeterminate_progress> <config_notification_indeterminate_progress-type>java.lang.Boolean</config_notification_indeterminate_progress-type> <config_notification_led_color><null></config_notification_led_color> <config_notification_led_color-type>java.lang.String</config_notification_led_color-type> <config_notification_led_off><null></config_notification_led_off> <config_notification_led_off-type>java.lang.String</config_notification_led_off-type> <config_notification_led_on><null></config_notification_led_on> <config_notification_led_on-type>java.lang.String</config_notification_led_on-type> <config_notification_max_progress><null></config_notification_max_progress> <config_notification_max_progress-type>java.lang.String</config_notification_max_progress-type> <config_notification_number><null></config_notification_number> <config_notification_number-type>java.lang.String</config_notification_number-type> <config_notification_persistent>true</config_notification_persistent> <config_notification_persistent-type>java.lang.Boolean</config_notification_persistent-type> <config_notification_picture><null></config_notification_picture> <config_notification_picture-type>java.lang.String</config_notification_picture-type> <config_notification_priority>-1</config_notification_priority> <config_notification_priority-type>java.lang.String</config_notification_priority-type> <config_notification_progress><null></config_notification_progress> <config_notification_progress-type>java.lang.String</config_notification_progress-type> <config_notification_subtext><null></config_notification_subtext> <config_notification_subtext-type>java.lang.String</config_notification_subtext-type> <config_notification_text><null></config_notification_text> <config_notification_text-type>java.lang.String</config_notification_text-type> <config_notification_ticker><null></config_notification_ticker> <config_notification_ticker-type>java.lang.String</config_notification_ticker-type> <config_notification_title>my recording</config_notification_title> <config_notification_title-type>java.lang.String</config_notification_title-type> <config_notification_url><null></config_notification_url> <config_notification_url-type>java.lang.String</config_notification_url-type> <config_notification_vibration><null></config_notification_vibration> <config_notification_vibration-type>java.lang.String</config_notification_vibration-type> <config_status_bar_icon><null></config_status_bar_icon> <config_status_bar_icon-type>java.lang.String</config_status_bar_icon-type> <net.dinglisch.android.tasker.RELEVANT_VARIABLES><StringArray sr=""><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0>%err Error Code Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1>%errmsg Error Message Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1></StringArray></net.dinglisch.android.tasker.RELEVANT_VARIABLES> <net.dinglisch.android.tasker.RELEVANT_VARIABLES-type>[Ljava.lang.String;</net.dinglisch.android.tasker.RELEVANT_VARIABLES-type> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS>StatusBarTextSize config_notification_title config_notification_action notificaitionid config_notification_priority plugininstanceid plugintypeid </net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type>java.lang.String</net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type> <net.dinglisch.android.tasker.subbundled>true</net.dinglisch.android.tasker.subbundled> <net.dinglisch.android.tasker.subbundled-type>java.lang.Boolean</net.dinglisch.android.tasker.subbundled-type> <notificaitionid>my-recording</notificaitionid> <notificaitionid-type>java.lang.String</notificaitionid-type> <notificaitionsound><null></notificaitionsound> <notificaitionsound-type>java.lang.String</notificaitionsound-type> <plugininstanceid>9fca7d3a-cca6-4bfb-8ec4-a991054350c5</plugininstanceid> <plugininstanceid-type>java.lang.String</plugininstanceid-type> <plugintypeid>com.joaomgcd.autonotification.intent.IntentNotification</plugintypeid> <plugintypeid-type>java.lang.String</plugintypeid-type> </Vals> </Bundle> <Str sr="arg1" ve="3">com.joaomgcd.autonotification</Str> <Str sr="arg2" ve="3">com.joaomgcd.autonotification.activity.ActivityConfigNotify</Str> <Int sr="arg3" val="0"/> <Int sr="arg4" val="1"/> </Action> <Action sr="act11" ve="7"> <code>559</code> <Str sr="arg0" ve="3">Go</Str> <Str sr="arg1" ve="3">default:default</Str> <Int sr="arg2" val="3"/> <Int sr="arg3" val="5"/> <Int sr="arg4" val="5"/> <Int sr="arg5" val="1"/> <Int sr="arg6" val="0"/> <Int sr="arg7" val="0"/> </Action> <Action sr="act12" ve="7"> <code>455</code> <Str sr="arg0" ve="3">sync/recordings/%filename</Str> <Int sr="arg1" val="0"/> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> <Int sr="arg4" val="0"/> </Action> <Action sr="act13" ve="7"> <code>38</code> </Action> <Action sr="act2" ve="7"> <code>657</code> </Action> <Action sr="act3" ve="7"> <code>559</code> <Str sr="arg0" ve="3">Done</Str> <Str sr="arg1" ve="3">default:default</Str> <Int sr="arg2" val="3"/> <Int sr="arg3" val="5"/> <Int sr="arg4" val="5"/> <Int sr="arg5" val="1"/> <Int sr="arg6" val="0"/> <Int sr="arg7" val="0"/> </Action> <Action sr="act4" ve="7"> <code>2046367074</code> <Bundle sr="arg0"> <Vals sr="val"> <App><null></App> <App-type>java.lang.String</App-type> <CancelAll>false</CancelAll> <CancelAll-type>java.lang.Boolean</CancelAll-type> <CancelPersistent>false</CancelPersistent> <CancelPersistent-type>java.lang.Boolean</CancelPersistent-type> <CaseinsensitiveApp>false</CaseinsensitiveApp> <CaseinsensitiveApp-type>java.lang.Boolean</CaseinsensitiveApp-type> <CaseinsensitivePackage>false</CaseinsensitivePackage> <CaseinsensitivePackage-type>java.lang.Boolean</CaseinsensitivePackage-type> <CaseinsensitiveText>false</CaseinsensitiveText> <CaseinsensitiveText-type>java.lang.Boolean</CaseinsensitiveText-type> <CaseinsensitiveTitle>false</CaseinsensitiveTitle> <CaseinsensitiveTitle-type>java.lang.Boolean</CaseinsensitiveTitle-type> <ExactApp>false</ExactApp> <ExactApp-type>java.lang.Boolean</ExactApp-type> <ExactPackage>false</ExactPackage> <ExactPackage-type>java.lang.Boolean</ExactPackage-type> <ExactText>false</ExactText> <ExactText-type>java.lang.Boolean</ExactText-type> <ExactTitle>false</ExactTitle> <ExactTitle-type>java.lang.Boolean</ExactTitle-type> <InterceptApps><StringArray sr=""/></InterceptApps> <InterceptApps-type>[Ljava.lang.String;</InterceptApps-type> <InvertApp>false</InvertApp> <InvertApp-type>java.lang.Boolean</InvertApp-type> <InvertPackage>false</InvertPackage> <InvertPackage-type>java.lang.Boolean</InvertPackage-type> <InvertText>false</InvertText> <InvertText-type>java.lang.Boolean</InvertText-type> <InvertTitle>false</InvertTitle> <InvertTitle-type>java.lang.Boolean</InvertTitle-type> <OtherId><null></OtherId> <OtherId-type>java.lang.String</OtherId-type> <OtherPackage><null></OtherPackage> <OtherPackage-type>java.lang.String</OtherPackage-type> <OtherTag><null></OtherTag> <OtherTag-type>java.lang.String</OtherTag-type> <PackageName><null></PackageName> <PackageName-type>java.lang.String</PackageName-type> <RegexApp>false</RegexApp> <RegexApp-type>java.lang.Boolean</RegexApp-type> <RegexPackage>false</RegexPackage> <RegexPackage-type>java.lang.Boolean</RegexPackage-type> <RegexText>false</RegexText> <RegexText-type>java.lang.Boolean</RegexText-type> <RegexTitle>false</RegexTitle> <RegexTitle-type>java.lang.Boolean</RegexTitle-type> <Text><null></Text> <Text-type>java.lang.String</Text-type> <Title><null></Title> <Title-type>java.lang.String</Title-type> <com.twofortyfouram.locale.intent.extra.BLURB>Id: my-recording</com.twofortyfouram.locale.intent.extra.BLURB> <com.twofortyfouram.locale.intent.extra.BLURB-type>java.lang.String</com.twofortyfouram.locale.intent.extra.BLURB-type> <net.dinglisch.android.tasker.RELEVANT_VARIABLES><StringArray sr=""><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0>%err Error Code Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1>%errmsg Error Message Only available if you select &lt;b&gt;Continue Task After Error&lt;/b&gt; and the action ends in error</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1></StringArray></net.dinglisch.android.tasker.RELEVANT_VARIABLES> <net.dinglisch.android.tasker.RELEVANT_VARIABLES-type>[Ljava.lang.String;</net.dinglisch.android.tasker.RELEVANT_VARIABLES-type> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS>notificaitionid plugininstanceid plugintypeid </net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS> <net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type>java.lang.String</net.dinglisch.android.tasker.extras.VARIABLE_REPLACE_KEYS-type> <net.dinglisch.android.tasker.subbundled>true</net.dinglisch.android.tasker.subbundled> <net.dinglisch.android.tasker.subbundled-type>java.lang.Boolean</net.dinglisch.android.tasker.subbundled-type> <notificaitionid>my-recording</notificaitionid> <notificaitionid-type>java.lang.String</notificaitionid-type> <plugininstanceid>da51b00c-7f2a-483d-864c-7fee8ac384aa</plugininstanceid> <plugininstanceid-type>java.lang.String</plugininstanceid-type> <plugintypeid>com.joaomgcd.autonotification.intent.IntentCancelNotification</plugintypeid> <plugintypeid-type>java.lang.String</plugintypeid-type> </Vals> </Bundle> <Str sr="arg1" ve="3">com.joaomgcd.autonotification</Str> <Str sr="arg2" ve="3">com.joaomgcd.autonotification.activity.ActivityConfigCancelNotification</Str> <Int sr="arg3" val="0"/> <Int sr="arg4" val="1"/> </Action> <Action sr="act5" ve="7"> <code>43</code> </Action> <Action sr="act6" ve="7"> <code>394</code> <Bundle sr="arg0"> <Vals sr="val"> <net.dinglisch.android.tasker.RELEVANT_VARIABLES><StringArray sr=""><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0>%current_time 00. Current time </_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES0><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1>%dt_millis 1. MilliSeconds Milliseconds Since Epoch</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES1><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES2>%dt_seconds 2. Seconds Seconds Since Epoch</_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES2><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES3>%dt_day_of_month 3. Day Of Month </_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES3><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES4>%dt_month_of_year 4. Month Of Year </_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES4><_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES5>%dt_year 5. Year </_array_net.dinglisch.android.tasker.RELEVANT_VARIABLES5></StringArray></net.dinglisch.android.tasker.RELEVANT_VARIABLES> <net.dinglisch.android.tasker.RELEVANT_VARIABLES-type>[Ljava.lang.String;</net.dinglisch.android.tasker.RELEVANT_VARIABLES-type> </Vals> </Bundle> <Int sr="arg1" val="1"/> <Int sr="arg10" val="0"/> <Str sr="arg11" ve="3"/> <Str sr="arg12" ve="3"/> <Str sr="arg2" ve="3"/> <Str sr="arg3" ve="3"/> <Str sr="arg4" ve="3"/> <Str sr="arg5" ve="3">yyyy_MM_dd_HH_mm_ss</Str> <Str sr="arg6" ve="3"/> <Str sr="arg7" ve="3">current_time</Str> <Int sr="arg8" val="0"/> <Int sr="arg9" val="0"/> </Action> <Action sr="act7" ve="7"> <code>547</code> <Str sr="arg0" ve="3">%filename</Str> <Str sr="arg1" ve="3">%current_time.mp4</Str> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> <Int sr="arg4" val="0"/> <Int sr="arg5" val="3"/> <Int sr="arg6" val="1"/> </Action> <Action sr="act8" ve="7"> <code>547</code> <Str sr="arg0" ve="3">%RECORDING</Str> <Str sr="arg1" ve="3">1</Str> <Int sr="arg2" val="0"/> <Int sr="arg3" val="0"/> <Int sr="arg4" val="0"/> <Int sr="arg5" val="3"/> <Int sr="arg6" val="1"/> </Action> <Action sr="act9" ve="7"> <code>548</code> <Str sr="arg0" ve="3">%filename</Str> <Int sr="arg1" val="0"/> <Str sr="arg10" ve="3"/> <Int sr="arg11" val="1"/> <Int sr="arg12" val="0"/> <Str sr="arg13" ve="3"/> <Int sr="arg14" val="0"/> <Str sr="arg15" ve="3"/> <Int sr="arg2" val="0"/> <Str sr="arg3" ve="3"/> <Str sr="arg4" ve="3"/> <Str sr="arg5" ve="3"/> <Str sr="arg6" ve="3"/> <Str sr="arg7" ve="3"/> <Str sr="arg8" ve="3"/> <Int sr="arg9" val="1"/> </Action> </Task> </TaskerData>
It looks like there are plenty of things I can do by voice. If I can talk, then I can record a braindump. If I can't talk but I can listen to things, then Emacs TV might be a good choice. If I want to read, I can read webpages or e-books. If my hands are busy, I can still add items to my grocery list or my Orgzly notebook. I just need to practice.
I can experiment with ARIA labels or Web Speech API interfaces on a simpler website, since emacs.tv is a bit complicated. If that doesn't let me do the speech interfaces I'm thinking of, then I might need to look into making a simple Android app.
I'd like to learn more about Orgzly Revived intents. At some point, I should probably learn more about Android programming too. There are a bunch of tweaks I might like to make to Orgzly Revived and the Emacs port of Android.
Also somewhat tempted by the idea of adding voice control or voice input to Emacs and/or Linux. If I'm on my computer already, I can usually just type, but it might be handy for using it hands-free while I'm in the kitchen. Besides, exploring accessibility early will also probably pay off when it comes to age-related changes. There's the ffmpeg+Whisper approach, there's a more sophisticated dictation mode with a voice cursor, there are some tools for Emacs tools for working with Talon or Dragonfly… There's been a lot of work in this area, so I might be able to find something that fits.
Promising!
You can e-mail me at [email protected].
]]>Here's what it looks like when I have the post, the transcript, and the annotated PDF.
Here's what I needed to implement my-audio-braindump-from-whisperx-json (plus some code from my previous audio braindump workflow):
(defun my-whisperx-word-list (file) (let* ((json-object-type 'alist) (json-array-type 'list)) (seq-mapcat (lambda (seg) (alist-get 'words seg)) (alist-get 'segments (json-read-file file))))) ;; (seq-take (my-whisperx-word-list (my-latest-file "~/sync/recordings" "\\.json")) 10) (defun my-whisperx-insert-word-list (words) "Inserts WORDS with text properties." (require 'subed-word-data) (mapc (lambda (word) (let ((start (point))) (insert (alist-get 'word word)) (subed-word-data--add-word-properties start (point) word) (insert " "))) words)) (defun my-audio-braindump-turn-sections-into-headings () (interactive) (goto-char (point-min)) (while (re-search-forward "START SECTION \\(.+?\\) STOP SECTION" nil t) (replace-match (save-match-data (format "\n*** %s\n" (save-match-data (string-trim (replace-regexp-in-string "^[,\\.]\\|[,\\.]$" "" (match-string 1)))))) nil t) (let ((prop-match (save-excursion (text-property-search-forward 'subed-word-data-start)))) (when prop-match (org-entry-put (point) "START" (format-seconds "%02h:%02m:%02s" (prop-match-value prop-match))))))) (defun my-audio-braindump-split-sentences () (interactive) (goto-char (point-min)) (while (re-search-forward "[a-z]\\. " nil t) (replace-match (concat (string-trim (match-string 0)) "\n") ))) (defun my-audio-braindump-restructure () (interactive) (goto-char (point-min)) (my-subed-fix-common-errors) (org-mode) (my-audio-braindump-prepare-alignment-breaks) (my-audio-braindump-turn-sections-into-headings) (my-audio-braindump-split-sentences) (goto-char (point-min)) (my-remove-filler-words-at-start)) (defun my-audio-braindump-from-whisperx-json (file) (interactive (list (read-file-name "JSON: " "~/sync/recordings/" nil nil nil (lambda (f) (string-match "\\.json\\'" f))))) ;; put them all into a buffer (with-current-buffer (get-buffer-create "*Words*") (erase-buffer) (fundamental-mode) (my-whisperx-insert-word-list (my-whisperx-word-list file)) (my-audio-braindump-restructure) (goto-char (point-min)) (switch-to-buffer (current-buffer)))) (defun my-audio-braindump-process-text (file) (interactive (list (read-file-name "Text: " "~/sync/recordings/" nil nil nil (lambda (f) (string-match "\\.txt\\'" f))))) (with-current-buffer (find-file-noselect file) (my-audio-braindump-restructure) (save-buffer))) ;; (my-audio-braindump-from-whisperx-json (my-latest-file "~/sync/recordings" "\\.json"))
Ideas for next steps:
You can e-mail me at [email protected].
]]>
I added an emacsconf-pad-append-text function to
emacsconf-pad.el that uses the appendText function.
You can e-mail me at [email protected].
]]>Added a quick video!
Audio recording is handy for capturing thoughts as I wait, walk around, or do chores. But my wireless earbuds don't have a good mic, I rarely got back to reviewing the wall of text, and I don't trust speech recognition to catch all my words.
Here's a new brain-dumping workflow that I've been experimenting with, though. I use a lapel mic to record in my phone. Google Recorder gives me an audio file as well as a rough transcript right away.
I copy those with Syncthing.
If I use keywords like "start" or "stop" along with things like "topic", "reminder", or "summary", then I can put those on separate lines automatically (my-transcript-prepare-alignment-breaks).
... News. Miscellaneous little tasks that he doing. I do want to finish that blog post about the playlist Just so that it's out. Something else that people can, you know, refer to or that I can refer to. Uh, And at some point I want to think about, This second brain stuff. So, right now, What's my current state? Uh, START CHAPTER second brain STOP CHAPTER Right now, I dumped everything into originally. In my inbox, if I come across an interesting website. As usually in my phone. So then I share it. As. Something links at those or four none. Uh, into my inbox. ...
I use subed-align to get the timestamps, and add the headings.
00:20:18.680 --> 00:20:24.679 So, right now, What's my current state? Uh, NOTE CHAPTER: second brain 00:20:24.680 --> 00:20:30.719 START CHAPTER second brain STOP CHAPTER
I can then create an Org Mode TODO item with a quick hyperlinked summary as well as my transcript.
I can jump to the audio if there are misrecognized words.
I can use subed-waveform to tweak the start and end times. (subed-waveform-show-current, then left-clicking to set the start or right-clicking to set the end, or using keybindings to adjust the start/stop).
Someday I'll write code to send sections to a better speech recognition engine or to AI. In the meantime, this is pretty good.
Here's how the code works:
There are several things I want to do while dictating.
By analyzing the text, I might be able to make my own command system.
So far, for starting keywords, I can use "start", "begin", or "open". I pair that with one of these part keywords:
Then the code can extract the text until the matching "stop/close/end
<part>", assuming it happens within 50 words or so.
(my-transcript-close-keyword-distance-words)
Sometimes keywords get misrecognized. "Begin summary" sometimes becomes "again summary" or "the game summary". I could try "open" and "close". Commercial dictation programs like Dragon NaturallySpeaking use "open" and "close" for punctuation, so that would probably work fine. "Start" works well, but "end" doesn't because it can confused with "and".
Sometimes an extra word sneaks in, either because I say it or because
the speech recognition tries too hard to guess. "Begin reminder" ends
up as "Begin a reminder." I changed from using regular expressions
that searched for just start-keyword + part-keyword to one that looked
for the start of the keyword phrase and then looked for the next
keyword within the next X words. (my-transcript-scan-for-part-keyword)
(defvar my-transcript-open-keywords '("start" "begin" "open")) (defvar my-transcript-close-keywords '("stop" "end" "close")) (defvar my-transcript-part-keywords '("summary" "chapter" "topic" "section" "action" "idea" "journal" "reminder" "command" "interruption" "note" "next step" "next steps" "tags" "tag" "keywords" "keyword")) (defvar my-transcript-part-keyword-distance-words 2 "Number of words to scan for part keyword.") (defvar my-transcript-close-keyword-distance-words 50 "number of words to scan for stop keyword. Put the keywords on the same line if found.") (defun my-transcript-scan-for-part-keyword (before-part &optional part-keywords within-distance before-distance) "Look for BEFORE-PART followed by PART-KEYWORDS. There might be WITHIN-DISTANCE words between BEFORE-PART and PART-KEYWORDS, and the pair might be within BEFORE-DISTANCE from point. Distances are in words. Return (start end before-part part) if found, nil otherwise." (setq before-part (pcase before-part ('start my-transcript-open-keywords) ('stop my-transcript-close-keywords) ('nil (append my-transcript-open-keywords my-transcript-close-keywords)) (_ before-part))) (setq part-keywords (or part-keywords my-transcript-part-keywords)) (when (stringp part-keywords) (setq part-keywords (list part-keywords))) (setq within-distance (or within-distance my-transcript-part-keyword-distance-words)) (setq before-distance (if (eq before-distance t) (point-max) (or before-distance my-transcript-close-keyword-distance-words))) (let (result start end (before-point (save-excursion (forward-word before-distance) (point))) before-word part-word) (save-excursion (when (looking-at (regexp-opt before-part)) (setq before-word (match-string 0) start (match-beginning 0)) (when (re-search-forward (regexp-opt part-keywords) (save-excursion (forward-word within-distance) (point)) t) (setq result (list start (match-end 0) before-word (match-string 0))))) (while (and (not result) (re-search-forward (regexp-opt before-part) before-point t)) (setq before-word (match-string 0) start (match-beginning 0)) (when (re-search-forward (regexp-opt part-keywords) (save-excursion (forward-word within-distance) (point)) t) (setq result (list start (match-end 0) before-word (match-string 0))))) (when result (goto-char (elt result 1))) result))) (ert-deftest my-transcript-scan-for-part-keyword () (with-temp-buffer (insert "some text start a reminder hello world stop there and do something stop reminder more text") (goto-char (point-min)) (let ((result (my-transcript-scan-for-part-keyword 'start nil))) (expect (elt result 2) :to-equal "start") (expect (elt result 3) :to-equal "reminder")) (let ((result (my-transcript-scan-for-part-keyword 'stop "reminder"))) (expect (elt result 2) :to-equal "stop") (expect (elt result 3) :to-equal "reminder"))))
Now I can use that to scan through the text. I want to put commands on
their own lines so that subed-align will get the timestamp for that
segment and so that the commands are easier to parse.
I also want to detect "oops" and split things up so that the start of
that line matches my correction after the "oops". I use
my-subed-split-oops for that, which I should write about in another
post. By putting the oops fragment on its own line, I can use
subed-align to get a timestamp for just that segment. Then I can
either use flush-lines to get rid of anything with "oops" in it. I
can even remove the subtitle and use subed-record-compile-media to
compile audio/video without that segment, if I want to use the audio
without rerecording it.
And the way I can help is by jotting words down in a mind map, typing her sentences. Oops typing, her sentences And generating, follow-up questions.
I also all-caps the keyword phrases so that they're easier to see when skimming the text file.
(defun my-transcript-prepare-alignment-breaks () "Split lines in preparation for forced alignment with aeneas. Split \"oops\" so that it's at the end of the line and the previous line starts with roughly the same words as the next line, for easier removal. Add a linebreak before \"begin/start\" followed by `my-transcript-part-keywords'. Add a linebreak after \"stop\" followed by `my-transcript-part-keywords'. Look for begin keyword ... stop keyword with at most `my-transcript-part-keyword-distance-words' between them and put them on one line." (interactive) (let ((case-fold-search t) result close-result) (my-split-oops) ;; break "begin/start keyword" (goto-char (point-min)) (while (setq result (my-transcript-scan-for-part-keyword 'start nil nil t)) (goto-char (car result)) (delete-region (car result) (elt result 1)) (insert "\n" (upcase (concat (elt result 2) " " (elt result 3))) "\n")) ;; break stop (goto-char (point-min)) (while (setq result (my-transcript-scan-for-part-keyword 'stop nil nil t)) (goto-char (car result)) (delete-region (car result) (elt result 1)) (insert (upcase (concat (elt result 2) " " (elt result 3))) "\n")) ;; try to get start and end sections on one line (goto-char (point-min)) (while (setq result (my-transcript-scan-for-part-keyword 'start nil nil t)) (goto-char (elt result 1)) (setq stop-result (my-transcript-scan-for-part-keyword 'stop (elt result 3))) (if stop-result (progn (goto-char (car stop-result)) (while (re-search-backward " *\n+ *" (car result) t) (replace-match " "))) ;; no stop keyword; are we on an empty line? If so, just merge it with the next one (when (looking-at "\n+ *") (replace-match " ")))) ;; remove empty lines (goto-char (point-min)) (when (looking-at "\n+") (replace-match "")) (while (re-search-forward "\n\n+" nil t) (replace-match "\n")) (goto-char (point-min)) (while (re-search-forward " *\n *" nil t) (replace-match "\n")))) (ert-deftest my-transcript-prepare-alignment-breaks () (with-temp-buffer (insert "some text start a reminder hello world stop there and do something stop reminder more text") (goto-char (point-min)) (my-transcript-prepare-alignment-breaks) (expect (buffer-string) :to-equal "some text START REMINDER hello world stop there and do something STOP REMINDER more text")))
subed-align gives me a VTT subtitle file with timestamps and text. I
add NOTE comments with the keywords and make subed: links to the
timestamps using the ol-subed.el that I just added.
(defun my-transcript-get-subtitle-note-based-on-keywords (sub-text) (let ((case-fold-search t)) (when (string-match (concat "^" (regexp-opt my-transcript-open-keywords) " \\(" (regexp-opt my-transcript-part-keywords) "\\) \\(.+?\\)\\( " (regexp-opt my-transcript-close-keywords) " " (regexp-opt my-transcript-part-keywords) "\\)?$") sub-text) (concat (match-string 1 sub-text) ": " (match-string 2 sub-text))))) (ert-deftest my-transcript-get-subtitle-note-based-on-keywords () (expect (my-transcript-get-subtitle-note-based-on-keywords "BEGIN NEXT STEPS . Think about how dictation helps me practice slower speed. CLOSE NEXT STEPS") :to-equal "NEXT STEPS: . Think about how dictation helps me practice slower speed.") (expect (my-transcript-get-subtitle-note-based-on-keywords "START SUMMARY hello world STOP SUMMARY") :to-equal "SUMMARY: hello world") (expect (my-transcript-get-subtitle-note-based-on-keywords "START CHAPTER hello world again") :to-equal "CHAPTER: hello world again") )
The last step is to take the list of subtitles and format it into the subtree.
;; todo: sort the completion? https://emacs.stackexchange.com/questions/55502/list-files-in-directory-in-reverse-order-of-date ;; (defun my-transcript-insert-subtitles-as-org-tree (vtt-filename) (interactive (list (read-file-name "VTT: " (expand-file-name "./" my-phone-recording-dir) nil t nil (lambda (s) (string-match "\\.vtt$" s))))) (let* ((subtitles (mapcar (lambda (sub) (unless (elt sub 4) (setf (elt sub 4) (my-transcript-get-subtitle-note-based-on-keywords (elt sub 3)))) sub) (subed-parse-file vtt-filename))) (start-date (my-transcript-get-file-start-time vtt-filename)) chapters tags start-of-entry) (setq start-of-entry (point)) (insert (format "* TODO Review braindump from %s :braindump:\n\n" (file-name-base vtt-filename))) (org-entry-put (point) "CREATED" (concat "[" (format-time-string (cdr org-timestamp-formats) (my-transcript-get-file-start-time (file-name-nondirectory vtt-filename))) "]")) (insert (format "%s - %s - %s\n" (org-link-make-string (concat "file:" (file-name-sans-extension vtt-filename) ".vtt") "VTT") (org-link-make-string (concat "file:" (file-name-sans-extension vtt-filename) ".txt") "Text") (org-link-make-string (concat "file:" (file-name-sans-extension vtt-filename) ".m4a") "Audio"))) (save-excursion (insert "** Transcript\n") ;; add each subtitle; add an ID in case we change the title (mapc (lambda (sub) (when (elt sub 4) (let ((note (my-transcript-get-subtitle-note-based-on-keywords (elt sub 3)))) (insert (concat "*** " note " " (org-link-make-string (format "subed:%s::%s" vtt-filename (my-msecs-to-timestamp (elt sub 1))) "VTT") "\n\n")) (org-entry-put (point) "CREATED" (concat "[" (format-time-string (cdr org-timestamp-formats) (time-add start-date (seconds-to-time (/ (elt sub 1) 1000.0)))) "]")) (org-entry-put (point) "START" (my-msecs-to-timestamp (elt sub 2))) (when (elt sub 4) (when (string-match "command: .*recognize" (elt sub 4)) (save-excursion ;; TODO: scope this to just the section someday (goto-char start-of-entry) (org-set-tags (append (list "recognize") (org-get-tags))))) (when (string-match "command: .*outline" (elt sub 4)) (save-excursion (goto-char start-of-entry) (org-set-tags (append (list "outline") (org-get-tags))))) (when (string-match "^time" (elt sub 4)) (insert "[" (org-format-time-string (cdr org-timestamp-formats) (time-add start-date (seconds-to-time (/ (elt sub 1) 1000)))) "]\n")) (when (string-match "command: .+\\(high\\|low\\)" (elt sub 4)) (save-excursion (goto-char start-of-entry) (org-priority (if (string= (downcase (match-string 1)) "high") ?A ?C)))) (when (string-match "\\(?:tags?\\|keywords?\\): \\(.+\\)" (elt sub 4)) (save-excursion (goto-char start-of-entry) (org-set-tags (append (split-string (match-string 1) " ") (org-get-tags)))))) (add-to-list 'chapters (format "- %s (%s)" (org-link-make-string (concat "id:" (org-id-get-create)) note) (org-link-make-string (format "subed:%s::%s" vtt-filename (my-msecs-to-timestamp (elt sub 1))) "VTT"))))) (insert (elt sub 3) "\n")) subtitles)) (when chapters (insert (string-join (nreverse chapters) "\n") "\n")))) (defun my-transcript-get-file-start-time (filename) (setq filename (file-name-base filename)) (cond ((string-match "^\\([0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]\\.[0-9][0-9]\\)" filename) (date-to-time (replace-regexp-in-string "\\." ":" (match-string 0 filename)))) ((string-match "^\\(?:Copy of \\)?\\([^ ][^ ][^ ]\\)[^ ]+ at \\([0-9]+\\)-\\([0-9]+\\)" filename) (let* ((day (match-string 1 filename)) (hour (match-string 2 filename)) (min (match-string 3 filename)) (changed-time (file-attribute-modification-time (file-attributes filename))) (decoded-time (decode-time changed-time))) ;; get the day on or before changed-time (if (string= (format-time-string "%a" changed-time) day) (encode-time (append (list 0 (string-to-number min) (string-to-number hour)) (seq-drop decoded-time 3))) ;; synchronized maybe within the week after (org-read-date t t (concat "-" day " " hour ":" min)))))))
So now we put that all together: rename the file using the calculated start time, prepare the alignment breaks, align the file to get the timestamps, and add the subtree to an Org file.
(defvar my-transcript-braindump-file "~/sync/orgzly/braindump.org") (defun my-transcript-make-todo (text-file &optional force) "Add TEXT-FILE as a TODO." (interactive (list (buffer-file-name) current-prefix-arg)) ;; rename the files to use the timestamps (unless (string-match "^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" (file-name-base text-file)) (setq text-file (my-transcript-rename-files-based-on-time text-file))) (let* ((recording (concat (file-name-sans-extension text-file) ".m4a")) (start (my-transcript-get-file-start-time text-file)) (vtt (concat (file-name-sans-extension text-file) ".vtt")) chapters (title (concat "Review braindump " text-file)) existing) ;; check if already exists (with-current-buffer (find-file-noselect my-transcript-braindump-file) (save-excursion (goto-char (point-min)) (setq existing (org-find-exact-headline-in-buffer title)))) (if (and existing (not force)) (progn (message "Going to existing heading") (org-goto-marker-or-bmk existing)) (if (or (null my-transcript-last-processed-time) (time-less-p my-transcript-last-processed-time start)) (customize-save-variable 'my-transcript-last-processed-time start)) (find-file text-file) (my-transcript-prepare-alignment-breaks) (save-buffer) (when (file-exists-p vtt) (delete-file vtt)) (when (get-file-buffer vtt) (kill-buffer (get-file-buffer vtt))) (subed-align recording text-file "VTT") (when (get-file-buffer vtt) (kill-buffer (get-file-buffer vtt))) (find-file my-transcript-braindump-file) (goto-char (point-min)) (if existing (progn (org-goto-marker-or-bmk existing) (delete-region (point) (org-end-of-subtree))) (org-next-visible-heading 1)) (my-transcript-insert-subtitles-as-org-tree vtt))))
I want to process multiple files in one batch.
(defun my-transcript-process (files &optional force) (interactive (list (cond ((and (derived-mode-p 'dired-mode) (dired-get-marked-files)) (dired-get-marked-files)) ((derived-mode-p 'dired-mode) (list (dired-get-filename))) ((string-match "\\.txt$" (buffer-file-name)) (list (buffer-file-name))) (t (read-file-name "Transcript: "))) current-prefix-arg)) (mapc (lambda (f) (when (string-match "txt" f) (my-transcript-make-todo f force))) files))
It would be nice to have it automatically keep track of the latest one
that's been processed, maybe via customize-save-variable. This still
needs some tinkering with.
(defcustom my-transcript-last-processed-time nil "The timestamp of the last processed transcript." :group 'sacha :type '(repeat integer)) (defun my-transcript-process-since-last () (interactive) (let ((files (seq-filter (lambda (f) (or (null my-transcript-last-processed-time) (time-less-p my-transcript-last-processed-time (my-transcript-get-file-start-time f)))) (directory-files my-phone-recording-dir 'full " at [0-9][0-9]-[0-9][0-9]\\.txt\\|^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]\\.[0-9][0-9]\\.txt")))) (mapc (lambda (f) (my-transcript-make-todo f) (let ((start (my-transcript-get-file-start-time f))) (if (time-less-p my-transcript-last-processed-time start) (setq my-transcript-last-processed-time start)))) files)) (customize-save-variable 'my-transcript-last-processed-time my-transcript-last-processed-time)) (defun my-transcript-rename-files-based-on-time (text-file) "Rename TEXT-FILE based on date. Return the new text file." (interactive (list (if (derived-mode-p 'dired-mode) (dired-get-filename) (buffer-file-name)))) (if (string-match "^[0-9][0-9][0-9][0-9]" text-file) text-file ; no change, already uses date (let* ((start (my-transcript-get-file-start-time (file-name-base text-file))) (new-base (format-time-string "%Y-%m-%dT%H.%M" start))) (if (file-exists-p (expand-file-name (concat new-base ".txt") (file-name-directory text-file))) (error "%s already exists" new-base) (dolist (ext '(".txt" ".m4a" ".vtt")) (if (file-exists-p (concat (file-name-sans-extension text-file) ext)) (rename-file (concat (file-name-sans-extension text-file) ext) (expand-file-name (concat new-base ext) (file-name-directory text-file))))) (expand-file-name (concat new-base ".txt") (file-name-directory text-file))))))
You can e-mail me at [email protected].
]]>This is a quick demonstration of using Deepgram's streaming API to do speech recognition live. It isn't as accurate as OpenAI Whisper but since Whisper doesn't have a streaming API, it'll do for now. I can correct misrecognized words manually. I tend to talk really quickly, so it displays the words per minute in my modeline. I put the words into an Org Mode buffer so I can toggle headings with avy and cycle visibility. When I'm done, it saves the text, JSON, and WAV for further processing. I think it'll be handy to have a quick way to take live notes during interviews or when I'm thinking out loud. Could be fun!
I'm still getting some weirdness when the mode turns on when I don't
expect it, so that's something to look into. Maybe I won't use it as a
mode for now. I'll just use my-live-speech-start and
my-live-speech-stop.
(defvar my-live-speech-buffer "*Speech*") (defvar my-live-speech-process nil) (defvar my-live-speech-output-buffer "*Speech JSON*") (defvar my-live-speech-functions '(my-live-speech-display-in-speech-buffer my-live-speech-display-wpm my-live-speech-append-to-etherpad) "Functions to call with one argument, the recognition results.") (defun my-live-speech-start () "Turn on live captions." (interactive) (with-current-buffer (get-buffer-create my-live-speech-buffer) (unless (process-live-p my-live-speech-process) (let ((default-directory "~/proj/deepgram-live")) (message "%s" default-directory) (with-current-buffer (get-buffer-create my-live-speech-output-buffer) (erase-buffer)) (setq my-live-speech-recent-words nil my-live-speech-wpm-string "READY ") (setq my-deepgram-process (make-process :command '("bash" "run.sh") :name "speech" :filter 'my-live-speech-json-filter :sentinel #'my-live-speech-process-sentinel :buffer my-live-speech-output-buffer))) (org-mode)) (display-buffer (current-buffer)))) (defun my-live-speech-stop () (interactive) (if (process-live-p my-live-speech-process) (kill-process my-live-speech-process)) (setq my-live-speech-wpm-string nil)) ;; (define-minor-mode my-live-speech-mode ;; "Show live speech and display WPM. ;; Need to check how to reliably turn this on and off." ;; :global t :group 'sachac ;; (if my-live-speech-mode ;; (my-live-speech-start) ;; (my-live-speech-stop) ;; (setq my-live-speech-wpm-string nil))) ;; based on subed-mpv::client-filter (defun my-live-speech-handle-json (line-object) "Process the JSON object in LINE." (run-hook-with-args 'my-live-speech-functions (json-parse-string line :object-type 'alist))) (defun my-live-speech-process-sentinel (proc event) (when (string-match "finished" event) (my-live-speech-stop) ;(my-live-speech-mode -1) )) (defun my-live-speech-json-filter (proc string) (when (buffer-live-p (process-buffer proc)) (with-current-buffer (process-buffer proc) (let* ((proc-mark (process-mark proc)) (moving (= (point) proc-mark))) ;; insert the output (save-excursion (goto-char proc-mark) (insert string) (set-marker proc-mark (point))) (if moving (goto-char proc-mark)) ;; process and remove all complete lines of JSON (lines are complete if ending with \n) (let ((pos (point-min))) (while (progn (goto-char pos) (end-of-line) (equal (following-char) ?\n)) (let* ((end (point)) (line (buffer-substring pos end))) (delete-region pos (+ end 1)) (with-current-buffer (get-buffer my-live-speech-buffer) (my-live-speech-handle-json line)))))))))
Python code based on the Deepgram streaming test suite:
# Based on streaming-test-suite # https://developers.deepgram.com/docs/getting-started-with-the-streaming-test-suite import pyaudio import asyncio import json import os import websockets from datetime import datetime import wave import sys startTime = datetime.now() key = os.environ['DEEPGRAM_API_KEY'] live_json = os.environ.get('LIVE_CAPTIONS_JSON', True) all_mic_data = [] all_transcripts = [] all_words = [] FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 CHUNK = 8000 audio_queue = asyncio.Queue() REALTIME_RESOLUTION = 0.250 SAMPLE_SIZE = 0 def save_info(): global SAMPLE_SIZE base = startTime.strftime('%Y%m%d%H%M') wave_file_path = os.path.abspath(f"{base}.wav") wave_file = wave.open(wave_file_path, "wb") wave_file.setnchannels(CHANNELS) wave_file.setsampwidth(SAMPLE_SIZE) wave_file.setframerate(RATE) wave_file.writeframes(b"".join(all_mic_data)) wave_file.close() with open(f"{base}.txt", "w") as f: f.write("\n".join(all_transcripts)) with open(f"{base}.json", "w") as f: f.write(json.dumps(all_words)) if live_json: print(f'{{"msg": "🟢 Saved to {base}.txt , {base}.json , {base}.wav", "base": "{base}"}}') else: print(f"🟢 Saved to {base}.txt , {base}.json , {base}.wav") # Used for microphone streaming only. def mic_callback(input_data, frame_count, time_info, status_flag): audio_queue.put_nowait(input_data) return (input_data, pyaudio.paContinue) async def run(key, method="mic", format="text", **kwargs): deepgram_url = f'wss://api.deepgram.com/v1/listen?punctuate=true&smart_format=true&utterances=true&encoding=linear16&sample_rate=16000' async with websockets.connect( deepgram_url, extra_headers={"Authorization": "Token {}".format(key)} ) as ws: async def sender(ws): try: while True: mic_data = await audio_queue.get() all_mic_data.append(mic_data) await ws.send(mic_data) except websockets.exceptions.ConnectionClosedOK: await ws.send(json.dumps({"type": "CloseStream"})) if live_json: print('{"msg": "Closed."}') else: print("Closed.") async def receiver(ws): global all_words """Print out the messages received from the server.""" first_message = True first_transcript = True transcript = "" async for msg in ws: res = json.loads(msg) if first_message: first_message = False try: # handle local server messages if res.get("msg"): if live_json: print(json.dumps(res)) else: print(res["msg"]) if res.get("is_final"): transcript = ( res.get("channel", {}) .get("alternatives", [{}])[0] .get("transcript", "") ) if transcript != "": if first_transcript: first_transcript = False if live_json: print(json.dumps(res.get("channel", {}).get("alternatives", [{}])[0])) else: print(transcript) all_transcripts.append(transcript) all_words = all_words + res.get("channel", {}).get("alternatives", [{}])[0].get("words", []) # if using the microphone, close stream if user says "goodbye" if method == "mic" and "goodbye" in transcript.lower(): await ws.send(json.dumps({"type": "CloseStream"})) if live_json: print('{"msg": "Done."}') else: print("Done.") # handle end of stream if res.get("created"): save_info() except KeyError: print(f"🔴 ERROR: Received unexpected API response! {msg}") # Set up microphone if streaming from mic async def microphone(): audio = pyaudio.PyAudio() stream = audio.open( format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=mic_callback, ) stream.start_stream() global SAMPLE_SIZE SAMPLE_SIZE = audio.get_sample_size(FORMAT) while stream.is_active(): await asyncio.sleep(0.1) stream.stop_stream() stream.close() functions = [ asyncio.ensure_future(sender(ws)), asyncio.ensure_future(receiver(ws)), ] functions.append(asyncio.ensure_future(microphone())) if live_json: print('{"msg": "Ready."}') else: print("🟢 Ready.") await asyncio.gather(*functions) def main(): """Entrypoint for the example.""" # Parse the command-line arguments. try: asyncio.run(run(key, "mic", "text")) except websockets.exceptions.InvalidStatusCode as e: print(f'🔴 ERROR: Could not connect to Deepgram! {e.headers.get("dg-error")}') print( f'🔴 Please contact Deepgram Support ([email protected]) with request ID {e.headers.get("dg-request-id")}' ) return except websockets.exceptions.ConnectionClosedError as e: error_description = f"Unknown websocket error." print( f"🔴 ERROR: Deepgram connection unexpectedly closed with code {e.code} and payload {e.reason}" ) if e.reason == "DATA-0000": error_description = "The payload cannot be decoded as audio. It is either not audio data or is a codec unsupported by Deepgram." elif e.reason == "NET-0000": error_description = "The service has not transmitted a Text frame to the client within the timeout window. This may indicate an issue internally in Deepgram's systems or could be due to Deepgram not receiving enough audio data to transcribe a frame." elif e.reason == "NET-0001": error_description = "The service has not received a Binary frame from the client within the timeout window. This may indicate an internal issue in Deepgram's systems, the client's systems, or the network connecting them." print(f"🔴 {error_description}") # TODO: update with link to streaming troubleshooting page once available # print(f'🔴 Refer to our troubleshooting suggestions: ') print( f"🔴 Please contact Deepgram Support ([email protected]) with the request ID listed above." ) return except websockets.exceptions.ConnectionClosedOK: return except Exception as e: print(f"🔴 ERROR: Something went wrong! {e}") save_info() return if __name__ == "__main__": sys.exit(main() or 0)
The Python script sends the microphone stream to Deepgram and prints out the JSON output. The Emacs Lisp code starts an asynchronous process and reads the JSON output, displaying the transcript and calculating the WPM based on the words. run.sh just loads the venv for this project (requirements.txt based on the streaming text suite) and then runs app.py, since some of the Python library versions conflict with other things I want to experiment with.
I also added
my-live-speech-wpm-string to my mode-line-format manually using
Customize, since I wanted it displayed on the left side instead of
getting lost when I turn keycast-mode on.
I'm still a little anxious about accidentally leaving a process
running, so I check with ps aux | grep python3. Eventually I'll
figure out how to make sure everything gets properly stopped when I'm
done.
Anyway, there it is!
(defun my-live-speech-display-in-speech-buffer (recognition-results) (with-current-buffer (get-buffer-create my-live-speech-buffer) (let-alist recognition-results (let* ((pos (point)) (at-end (eobp))) (goto-char (point-max)) (unless (eolp) (insert "\n")) (when .msg (insert .msg "\n")) (when .transcript (insert .transcript "\n")) ;; scroll to the bottom if being displayed (if at-end (when (get-buffer-window (current-buffer)) (set-window-point (get-buffer-window (current-buffer)) (point))) (goto-char pos)))))) (defun my-live-speech-toggle-heading () "Toggle a line as a heading." (interactive) (with-current-buffer (get-buffer my-live-speech-buffer) (display-buffer (current-buffer)) (with-selected-window (get-buffer-window (get-buffer my-live-speech-buffer)) (let ((avy-all-windows nil)) (avy-goto-line 1)) (org-toggle-heading 1)))) (defun my-live-speech-cycle-visibility () "Get a quick overview." (interactive) (with-current-buffer (get-buffer my-live-speech-buffer) (display-buffer (current-buffer)) (if (eq org-cycle-global-status 'contents) (progn (run-hook-with-args 'org-cycle-pre-hook 'all) (org-fold-show-all '(headings blocks)) (setq org-cycle-global-status 'all) (run-hook-with-args 'org-cycle-hook 'all)) (run-hook-with-args 'org-cycle-pre-hook 'contents) (org-cycle-content) (setq org-cycle-global-status 'contents) (run-hook-with-args 'org-cycle-hook 'contents))))
(defvar my-live-speech-wpm-window-seconds 15 "How many seconds to calculate WPM for.") (defvar my-live-speech-recent-words nil "Words spoken in `my-live-speech-wpm-window-minutes'.") (defvar my-live-speech-wpm nil "Current WPM.") (defvar my-live-speech-wpm-colors ; haven't figured out how to make these work yet '((180 :foreground "red") (170 :foreground "yellow") (160 :foreground "green"))) (defvar my-live-speech-wpm-string nil "Add this somewhere in `mode-line-format'.") (defun my-live-speech-wpm-string () (propertize (format "%d WPM " my-live-speech-wpm) 'face (cdr (seq-find (lambda (row) (> my-live-speech-wpm (car row))) my-live-speech-wpm-colors)))) (defun my-live-speech-display-wpm (recognition-results) (let-alist recognition-results (when .words ;; calculate WPM (setq my-live-speech-recent-words (append my-live-speech-recent-words .words nil)) (let ((threshold (- (assoc-default 'end (aref .words (1- (length .words)))) my-live-speech-wpm-window-seconds))) (setq my-live-speech-recent-words (seq-filter (lambda (o) (>= (assoc-default 'start o) threshold)) my-live-speech-recent-words)) (setq my-live-speech-wpm (/ (length my-live-speech-recent-words) (/ (- (assoc-default 'end (aref .words (1- (length .words)))) (assoc-default 'start (car my-live-speech-recent-words))) 60.0))) (setq my-live-speech-wpm-string (my-live-speech-wpm-string))))))
(defvar my-live-speech-etherpad-id nil) (defun my-live-speech-append-to-etherpad (recognition-results) (when my-live-speech-etherpad-id (emacsconf-pad-append-text my-live-speech-etherpad-id (concat " " (assoc-default 'transcript recognition-results)))))
You can e-mail me at [email protected].
]]>