]> code.delx.au - gnu-emacs-elpa/blob - packages/transcribe/transcribe.el
Add 'packages/compact-docstrings/' from commit '7ada669605c4e2a9a00fa6d03da7176f2c6e3297'
[gnu-emacs-elpa] / packages / transcribe / transcribe.el
1 ;;; transcribe.el --- Package for audio transcriptions
2
3 ;; Copyright 2014-2016 Free Software Foundation, Inc.
4
5 ;; Author: David Gonzalez Gandara <dggandara@member.fsf.org>
6 ;; Version: 1.5.0
7
8 ;; This program is free software: you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation, either version 3 of the License, or
11 ;; (at your option) any later version.
12 ;;
13 ;; This program is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21 ;;; Commentary:
22
23 ;; REQUIRES:
24 ;; -----------------------------
25 ;; This module works without any requires, but in order to use the audio
26 ;; functions, you need to install the Emacs package "emms", by Joe Drew,
27 ;; and the external program "mpg321", by Jorgen Schafer and Ulrik Jensen,
28 ;; both under GPL licenses.
29 ;;
30 ;; USAGE:
31 ;; -------------------------
32 ;; Transcribe is a tool to make audio transcriptions for discourse analysis
33 ;; in the classroom.
34 ;; It allows the transcriber to control the audio easily while typing, as well as
35 ;; automate the insertion of xml tags, in case the transcription protocol
36 ;; include them.
37 ;; The analysis functions will search for a specific structure
38 ;; of episodes that can be automatically added with the macro NewEpisode.
39 ;; The function expects the speech acts to be transcribed inside a turn xml
40 ;; tag with the identifier of the speaker with optional move attribute.
41 ;; Each speech act is spected inside a <l1> or <l2> tag, depending
42 ;; on the language used by the person. The attributes expected are the
43 ;; number of clauses that form the utterance, the number of errors the
44 ;; transcriber observes, and the function of the speech act. The parser will
45 ;; work even if some attributes are missing.
46 ;;
47 ;;
48 ;; AUDIO COMMANDS
49 ;; ------------------------------
50 ;; C-x C-p ------> Play audio file. You will be prompted for the name
51 ;; of the file. The recommended format is mp2.
52 ;; <f5> ---------> Pause or play audio.
53 ;; C-x <right> --> seek audio 10 seconds forward.
54 ;; C-x <left> --->seek audio 10 seconds backward.
55 ;; <f8> ---------> seek interactively: positive seconds go forward and
56 ;; negative seconds go backward
57 ;;
58 ;; XML TAGGING COMMANDS
59 ;; --------------------------------------------------
60 ;; C-x C-n ------> Create new episode structure. This is useful in case your
61 ;; xml file structure requires it.
62 ;; <f2> ---------> Interactively insert a function attribute in a speech act
63 ;; (l1 or l2) tag.
64 ;; <f3> ---------> Interactively insert a move attribute in a turn (person) tag
65 ;; <f4> ---------> Interactively insert an attribute (any kind)
66 ;; <f9> ---------> Insert turn (person) tag. Inserts a move attribute.
67 ;; <f10> --------> Insert a custom tag. Edit the function to adapt to your needs.
68 ;; <f11> --------> Insert speech act tag in L1, with clauses, errors and function
69 ;; attributes.
70 ;; <f12> --------> Insert speech act tag in L2, with clauses, errors and function
71 ;; attributes.
72 ;;
73 ;; AUTOMATIC PARSING
74 ;; -----------------------------------------------------
75 ;; C-x C-a ------> Analyses the text for measurments of performance.
76
77 ;;; Code:
78
79 (require 'xml)
80
81 ;; (if t (require 'emms-setup))
82 ;; (require 'emms-player-mpd)
83 ;; (setq emms-player-mpd-server-name "localhost")
84 ;; (setq emms-player-mpd-server-port "6600")
85
86 (emms-standard)
87 (emms-default-players)
88 (if t (require 'emms-player-mpg321-remote))
89 (defvar emms-player-list)
90 (push 'emms-player-mpg321-remote emms-player-list)
91
92 (if t (require 'emms-mode-line))
93 (emms-mode-line 1)
94 (if t (require 'emms-playing-time))
95 (emms-playing-time 1)
96
97 (defvar transcribe-function-list '("initiating" "responding" "control" "expresive" "interpersonal"))
98 (defvar transcribe-move-list '("initiation" "response" "follow-up"))
99 (defvar transcribe-attribute-list '("clauses" "errors" "function" "move"))
100 ;; (append transcribe-attribute-list transcribe-function-list transcribe-move-list)
101
102 (defun transcribe-analyze-episode (episode person)
103 "This calls the external python package analyze_episodes2.py. The new
104 function transcribe-analyze implements its role now."
105 (interactive "sepisode: \nsperson:")
106 (shell-command (concat (expand-file-name "analyze_episodes2.py")
107 " -e " episode " -p " person " -i " buffer-file-name )))
108
109 (defun transcribe-raw-to-buffer ()
110 "EXPERIMENTAL - Convert the xml tagged transcription to raw transcription, with the names
111 and the persons and the utterances only. The raw transcription will be send to buffer called
112 'Raw Output'"
113 (interactive)
114 (let* ((xml (xml-parse-region (point-min) (point-max)))
115 (results (car xml))
116 (episodes (xml-get-children results 'episode)))
117
118 (dolist (episode episodes)
119 (let* ((transcription (xml-get-children episode 'transcription)))
120
121 (dolist (turn transcription)
122 (dolist (intervention (xml-node-children turn))
123 (if (listp intervention)
124 (progn
125 (with-current-buffer "Raw Output"
126 (insert (format "%s\t" (line-number-at-pos)))
127 (insert (format "%s:\t" (car intervention)))
128 (dolist (utterance (nthcdr 2 intervention))
129 (if (listp utterance)
130 (progn
131 (insert (format "%s " (nth 2 utterance))))
132
133 (insert (format "%s" utterance))))))
134
135 (with-current-buffer "Raw Output"
136 (insert (format "%s" (line-number-at-pos)))
137 (insert (format "%s" intervention))))))))))
138
139 (defun transcribe-analyze (episodenumber personid)
140 "Extract from a given episode and person the number of asunits per
141 second produced, and the number of clauses per asunits, for L2 and L1.
142 It writes two output files, one for L2 utterances and one for L1
143 utterances, so that they can be used with external programs. Output will
144 be inserted in 'Statistics Output' buffer"
145 (interactive "sepisodenumber: \nspersonid:")
146 (let* ((interventionsl2 '())
147 (interventionsl1 '())
148 (xml (xml-parse-region (point-min) (point-max)))
149 (results (car xml))
150 (episodes (xml-get-children results 'episode))
151 (asunitsl2 0.0000)
152 (asunitsl1 0.0000)
153 ;; (shifts 0.0000);; TODO implement
154 (initiating 0.0000);; TODO implement
155 (responding 0.0000);; TODO implement
156 (control 0.0000);; TODO implement
157 (expressive 0.0000);; TODO implement
158 (interpersonal 0.0000);; TODO implement
159 (clausesl1 0.0000)
160 ;; (errorsl1 0.0000);; TODO implement
161 (clausesl2 0.0000)
162 (errorsl2 0.0000)
163 (duration nil)
164 (role nil)
165 (context nil)
166 (demand nil)
167 ;; (clausesmessage nil)
168 (number nil))
169
170 (dolist (episode episodes)
171 (let*((numbernode (xml-get-children episode 'number))
172 (tasknode (xml-get-children episode 'task)))
173
174 (setq number (nth 2 (car numbernode)))
175 (when (equal episodenumber number)
176 (let* ((durationnode (xml-get-children episode 'duration))
177 (transcription (xml-get-children episode 'transcription)))
178
179 (setq duration (nth 2 (car durationnode)))
180
181 (dolist (task tasknode)
182 (let* ((rolenode (xml-get-children task 'role))
183 (contextnode (xml-get-children task 'context))
184 (demandnode (xml-get-children task 'demand)))
185
186 (setq role (nth 2 (car rolenode)))
187 (setq context (nth 2 (car contextnode)))
188 (setq demand (nth 2 (car demandnode)))
189 ;; (with-current-buffer "Statistics Output"
190 ;; (insert (format "role: %s; context: %s; demand: %s\n" role context demand)))
191 ))
192
193 (dolist (turn transcription)
194 (let* ((interventionnode (xml-get-children turn
195 (intern personid))))
196
197 (dolist (intervention interventionnode)
198 (let* ((l2node (xml-get-children intervention 'l2))
199 (l1node (xml-get-children intervention 'l1)))
200
201 (dolist (l2turn l2node)
202 (let* ((l2 (nth 2 l2turn))
203 (attrs (nth 1 l2turn))
204 (clausesl2nodeinc (cdr (assq 'clauses attrs)))
205 (errorsl2inc (cdr (assq 'errors attrs)))
206 (function (cdr (assq 'function attrs))))
207
208 (when (string-equal function "initiating")
209 (setq initiating (+ initiating 1)))
210 (when (string-equal function "responding")
211 (setq responding (+ responding 1)))
212 (when (string-equal function "control")
213 (setq control (+ control 1)))
214 (when (string-equal function "expressive")
215 (setq expressive (+ expressive 1)))
216 (when (string-equal function "interpersonal")
217 (setq interpersonal (+ interpersonal 1)))
218 (when attrs
219 (setq clausesl2 (+ clausesl2 (string-to-number
220 clausesl2nodeinc)))
221 (setq errorsl2 (+ errorsl2 (string-to-number
222 errorsl2inc))))
223 (when l2
224 ;; (add-to-list 'interventionsl2 l2)
225 (cl-pushnew l2 interventionsl2 :test #'equal)
226 (setq asunitsl2 (1+ asunitsl2)))))
227 (dolist (l1turn l1node)
228 (let*((l1 (nth 2 l1turn))
229 (clausesl1node (nth 1 l1turn))
230 (clausesl1nodeinc (cdr (car clausesl1node))))
231
232 (when (not (equal clausesl1node nil))
233 (setq clausesl1 (+ clausesl1 (string-to-number
234 clausesl1nodeinc))))
235 (when l1
236 ;; (add-to-list 'interventionsl1 l1)
237 (cl-pushnew l1 interventionsl1 :test #'equal)
238 (setq asunitsl1 (1+ asunitsl1)))))))))))))
239 (reverse interventionsl2)
240 ;; (write-region (format "%s" interventionsl2) nil (format "transcribe-output-%s-%s-l2.txt" episodenumber personid))
241 ;; Write raw interventions to file will be supported by a different function
242 (reverse interventionsl1)
243 ;; (write-region (format "%s" interventionsl1) nil (format "transcribe-output-%s-%s-l1.txt" episodenumber personid))
244 ;; (print interventionsl2) ;uncomment to display all the interventions on screen
245 (let((asunitspersecondl2 (/ asunitsl2 (string-to-number duration)))
246 (clausesperasunitl2 (/ clausesl2 asunitsl2))
247 (errorsperasunitl2 (/ errorsl2 asunitsl2))
248 (asunitspersecondl1 (/ asunitsl1 (string-to-number duration)))
249 ;; (clausesperasunitl1 (/ clausesl1 asunitsl1))
250 (initiatingperasunitl2 (/ initiating asunitsl2))
251 (respondingperasunitl2 (/ responding asunitsl2))
252 (controlperasunitl2 (/ control asunitsl2))
253 (expressiveperasunitl2 (/ expressive asunitsl2))
254 (interpersonalperasunitl2 (/ interpersonal asunitsl2)))
255
256 ;; (princ clausesmessage)
257 (princ (format "episode: %s, duration: %s, person: %s\n" episodenumber duration personid))
258 (with-current-buffer "Statistics Output"
259 (insert (format "%s,%s,%s,0,0,%s,%s,%s,%s,%s,QUAN-L2,segmented,aux,level,subject,yearofclil,month\n" personid episodenumber duration role context demand asunitspersecondl2 asunitspersecondl1)))
260 (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L2(errors/Asunit):%s, L1(Asunits/second): %s\n"
261 asunitspersecondl2 clausesperasunitl2 errorsperasunitl2 asunitspersecondl1))
262 (princ (format "Functions/unit: Initiating: %s, Responding: %s, Control: %s, Expressive: %s, Interpersonal: %s" initiatingperasunitl2 respondingperasunitl2 controlperasunitl2 expressiveperasunitl2 interpersonalperasunitl2)))))
263
264 (defun transcribe-analyze-all ()
265 "Analyze all file and output to 'Statistics Output' buffer. The buffer will
266 lost all previous data. The data in the buffer can be saved to a file and be
267 passed to 'R' for statistical analysis."
268 (interactive)
269 (let* ((xml (xml-parse-region (point-min) (point-max)))
270 (results (car xml))
271 (episodes (xml-get-children results 'episode)))
272
273 (with-current-buffer "Statistics Output"
274 (erase-buffer)
275 (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n"))
276 (dolist (episode episodes)
277 (let* ((numbernode (xml-get-children episode 'number))
278 (participantsnode (xml-get-children episode 'participants))
279 ;; (transcription (xml-get-children episode 'transcription))
280 (number (nth 2 (car numbernode)))
281 (participantsstring (nth 2 (car participantsnode)))
282 (participants (split-string participantsstring)))
283
284 (dolist (participant participants)
285 (transcribe-analyze number participant))))))
286
287
288 (defun transcribe-xml-tag-person (xmltag)
289 "This function allows the automatic insetion of a speaker xml tag and places the cursor."
290 (interactive "stag:")
291 (insert (format "<%s move=\"\"></%s>" xmltag xmltag))
292 (backward-char 3)
293 (backward-char (string-width xmltag)))
294
295 (defun transcribe-xml-tag (xmltag)
296 "This function allows the automatic insetion of a custom xml tag and places the cursor."
297 (interactive "stag:")
298 (insert (format "<%s></%s>" xmltag xmltag))
299 (backward-char 3)
300 (backward-char (string-width xmltag)))
301
302 (defun transcribe-region-xml-tag (xmltag)
303 "This function encapsulates the marked region in the given tag."
304 (interactive "stag:")
305 (let ((beginning (region-beginning))
306 (end (region-end)))
307 (goto-char beginning)
308 (insert (format "<%s>" xmltag))
309 (goto-char end)
310 (insert (format "</%s>" xmltag))))
311
312 (defun transcribe-add-attribute (att val)
313 "Adds a xml attribute at cursor with the name and value specified (autocompletion possible)"
314 (interactive (list(completing-read "attibute name:" transcribe-attribute-list)(read-string "value:")))
315 (insert (format "%s=\"%s\"" att val)))
316
317 (defun transcribe-add-attribute-function (val)
318 "Adds the xml attribute 'function' at cursor with the name specified (autocompletion possible)"
319 (interactive (list(completing-read "function name:" transcribe-function-list)))
320 (insert (format "function=\"%s\"" val)))
321
322 (defun transcribe-add-attribute-move (val)
323 "Adds the xml attribute 'move' at cursor with the name specified (autocompletion possible"
324 (interactive (list(completing-read "move name:" transcribe-move-list)))
325 (insert (format "move=\"%s\"" val)))
326
327 (defun transcribe-xml-tag-l1 ()
328 "Inserts a l1 tag and places the cursor"
329 (interactive)
330 (insert "<l1 clauses=\"1\" errors=\"0\" function=\"\"></l1>")
331 (backward-char 5))
332
333 (defun transcribe-xml-tag-l2 ()
334 "Inserts a l2 tag and places the cursor"
335 (interactive)
336 (insert "<l2 clauses=\"1\" errors=\"0\" function=\"\"></l2>")
337 (backward-char 5))
338
339 (defun transcribe-xml-tag-break (xmltag)
340 "This function breaks an unit into two. That is, insert a closing and an opening equal tags"
341 (interactive "stag:")
342 (insert (format "</%s><%s>" xmltag xmltag)))
343
344 (defun transcribe-display-audio-info ()
345 (interactive)
346 (emms-player-mpg321-remote-proc)
347 (shell-command "/usr/bin/mpg321 -R - &"))
348
349
350 (fset 'NewEpisode
351 "<episode>\n<number>DATE-NUMBER</number>\n<duration></duration>\n<comment></comment>\n<subject>Subject (level)</subject>\n<participants></participants>\n<task>\n\t<role>low or high</role>\n<context>low or high</context>\n<demand>low or high</demand>\r</task>\n<auxiliar>Yes/no</auxiliar>\n<transcription>\n</transcription>\n</episode>");Inserts a new episode structure
352
353
354 (defvar transcribe-mode-map
355 (let ((map (make-sparse-keymap)))
356 (define-key map (kbd "C-x C-p") 'emms-play-file)
357 (define-key map (kbd "C-x C-a") 'transcribe-analyze)
358 (define-key map (kbd "C-x C-n") 'NewEpisode)
359 (define-key map (kbd "C-x <down>") 'emms-stop)
360 (define-key map (kbd "C-x <right>") 'emms-seek-forward)
361 (define-key map (kbd "C-x <left>") 'emms-seek-backward)
362 (define-key map (kbd "<f2>") 'transcribe-add-attribute-move)
363 (define-key map (kbd "<f3>") 'transcribe-add-attribute-function)
364 (define-key map (kbd "<f4>") 'transcribe-add-attribute)
365 (define-key map (kbd "<f5>") 'emms-pause)
366 (define-key map (kbd "<f8>") 'emms-seek)
367 (define-key map (kbd "<f9>") 'transcribe-xml-tag)
368 (define-key map (kbd "<f10>") 'transcribe-xml-tag-person)
369 (define-key map (kbd "<f11>") 'transcribe-xml-tag-l1)
370 (define-key map (kbd "<f12>") 'transcribe-xml-tag-l2)
371 map)
372 "Keymap for Transcribe minor mode.")
373
374
375 (easy-menu-define transcribe-mode-menu transcribe-mode-map
376 "Menu for Transcribe mode"
377 '("Transcribe"
378 ["Raw Output" transcribe-raw-to-buffer]
379 "---"
380 ["Analyze" transcribe-analyze]
381 ["Analyze all" arbitools-analyze-all]
382 "---"
383 ["Add transcription header" NewEpisode]
384 ["Add move attribute" transcribe-add-attribute-move]
385 ["Add function attribute" transcribe-add-attribute-function]
386 ["Add L1 intervention" transcribe-xml-tag-l1]
387 ["Add L2 intervention" transcribe-xml-tag-l2]
388 ["Add move" transcribe-xml-tag-person]
389 "---"
390 ["Play audio file" emms-play-file]
391 ))
392
393
394 ;;;###autoload
395 (define-minor-mode transcribe-mode
396 "Toggle transcribe-mode"
397 nil
398 " Trans"
399 transcribe-mode-map
400 (generate-new-buffer "Statistics Output")
401 (generate-new-buffer "Raw Output")
402 ;; (with-current-buffer "Raw Output"
403 ;; (linum-mode t)
404 ;; (setq linum-format "%d "))
405 (with-current-buffer "Statistics Output"
406 ;; (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n")
407 )
408 ;; TODO: save the students present in transcription in list so that we can use that list for transcribe-analyze-all
409 )
410
411 (provide 'transcribe)
412
413 ;;; transcribe.el ends here