]> code.delx.au - gnu-emacs-elpa/blob - packages/transcribe/transcribe.el
Merge commit 'c0a1e24ef39e2b0f388135c2ed8f8b419346337c'
[gnu-emacs-elpa] / packages / transcribe / transcribe.el
1 ;;; transcribe.el --- Package for audio transcriptions
2
3 ;; Copyright 2014-2016 Free Software Foundation, Inc.
4
5 ;; Author: David Gonzalez Gandara <dggandara@member.fsf.org>
6 ;; Version: 1.5.0
7
8 ;; This program is free software: you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation, either version 3 of the License, or
11 ;; (at your option) any later version.
12 ;;
13 ;; This program is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21 ;;; Commentary:
22
23 ;; REQUIRES:
24 ;; -----------------------------
25 ;; This module works without any requires, but in order to use the audio
26 ;; functions, you need to install the Emacs package "emms", by Joe Drew,
27 ;; and the external program "mpg321", by Jorgen Schafer and Ulrik Jensen,
28 ;; both under GPL licenses.
29 ;;
30 ;; USAGE:
31 ;; -------------------------
32 ;; Transcribe is a tool to make audio transcriptions for discourse analysis
33 ;; in the classroom.
34 ;; It allows the transcriber to control the audio easily while typing, as well as
35 ;; automate the insertion of xml tags, in case the transcription protocol
36 ;; include them.
37 ;; The analysis functions will search for a specific structure
38 ;; of episodes that can be automatically added with the macro NewEpisode.
39 ;; The function expects the speech acts to be transcribed inside a turn xml
40 ;; tag with the identifier of the speaker with optional move attribute.
41 ;; Each speech act is spected inside a <l1> or <l2> tag, depending
42 ;; on the language used by the person. The attributes expected are the
43 ;; number of clauses that form the utterance, the number of errors the
44 ;; transcriber observes, and the function of the speech act. The parser will
45 ;; work even if some attributes are missing.
46 ;;
47 ;;
48 ;; AUDIO COMMANDS
49 ;; ------------------------------
50 ;; C-x C-p ------> Play audio file. You will be prompted for the name
51 ;; of the file. The recommended format is mp2.
52 ;; <f5> ---------> Pause or play audio.
53 ;; C-x <right> --> seek audio 10 seconds forward.
54 ;; C-x <left> --->seek audio 10 seconds backward.
55 ;; <f8> ---------> seek interactively: positive seconds go forward and
56 ;; negative seconds go backward
57 ;;
58 ;; XML TAGGING COMMANDS
59 ;; --------------------------------------------------
60 ;; C-x C-n ------> Create new episode structure. This is useful in case your
61 ;; xml file structure requires it.
62 ;; <f2> ---------> Interactively insert a function attribute in a speech act
63 ;; (l1 or l2) tag.
64 ;; <f3> ---------> Interactively insert a move attribute in a turn (person) tag
65 ;; <f4> ---------> Interactively insert an attribute (any kind)
66 ;; <f9> ---------> Insert turn (person) tag. Inserts a move attribute.
67 ;; <f10> --------> Insert a custom tag. Edit the function to adapt to your needs.
68 ;; <f11> --------> Insert speech act tag in L1, with clauses, errors and function
69 ;; attributes.
70 ;; <f12> --------> Insert speech act tag in L2, with clauses, errors and function
71 ;; attributes.
72 ;;
73 ;; AUTOMATIC PARSING
74 ;; -----------------------------------------------------
75 ;; C-x C-a ------> Analyses the text for measurments of performance.
76
77 ;;; Code:
78
79 (require 'xml)
80
81 ;; (if t (require 'emms-setup))
82 ;; (require 'emms-player-mpd)
83 ;; (setq emms-player-mpd-server-name "localhost")
84 ;; (setq emms-player-mpd-server-port "6600")
85
86 (emms-standard)
87 (emms-default-players)
88 (if t (require 'emms-player-mpg321-remote))
89 (defvar emms-player-list)
90 (push 'emms-player-mpg321-remote emms-player-list)
91
92 (if t (require 'emms-mode-line))
93 (emms-mode-line 1)
94 (if t (require 'emms-playing-time))
95 (emms-playing-time 1)
96
97 (defvar transcribe-function-list '("initiating" "responding" "control" "expresive" "interpersonal"))
98 (defvar transcribe-move-list '("initiation" "response" "follow-up"))
99 (defvar transcribe-attribute-list '("clauses" "errors" "function" "move"))
100 ;; (append transcribe-attribute-list transcribe-function-list transcribe-move-list)
101
102 (defun transcribe-analyze-episode (episode person)
103 "This calls the external python package analyze_episodes2.py. The new
104 function transcribe-analyze implements its role now."
105 (interactive "sepisode: \nsperson:")
106 (shell-command (concat (expand-file-name "analyze_episodes2.py")
107 " -e " episode " -p " person " -i " buffer-file-name )))
108
109 (defun transcribe-raw-to-buffer ()
110 "EXPERIMENTAL - Convert the xml tagged transcription to raw transcription, with the names
111 and the persons and the utterances only. The raw transcription will be send to buffer called
112 'Raw Output'"
113 (interactive)
114 (let* ((xml (xml-parse-region (point-min) (point-max)))
115 (results (car xml))
116 (episodes (xml-get-children results 'episode)))
117
118 (dolist (episode episodes)
119 (let* ((transcription (xml-get-children episode 'transcription)))
120
121 (dolist (turn transcription)
122 (dolist (intervention (xml-node-children turn))
123 (when (listp intervention)
124 (with-current-buffer "Raw Output"
125 (insert (format "%s: " (car intervention)))
126 (dolist (utterance (nthcdr 2 intervention))
127 (when (listp utterance)
128 (insert (format "%s " (nth 2 utterance)))))
129 (insert "\n")))))))))
130
131 (defun transcribe-analyze (episodenumber personid)
132 "Extract from a given episode and person the number of asunits per
133 second produced, and the number of clauses per asunits, for L2 and L1.
134 It writes two output files, one for L2 utterances and one for L1
135 utterances, so that they can be used with external programs. Output will
136 be inserted in 'Statistics Output' buffer"
137 (interactive "sepisodenumber: \nspersonid:")
138 (let* ((interventionsl2 '())
139 (interventionsl1 '())
140 (xml (xml-parse-region (point-min) (point-max)))
141 (results (car xml))
142 (episodes (xml-get-children results 'episode))
143 (asunitsl2 0.0000)
144 (asunitsl1 0.0000)
145 ;; (shifts 0.0000);; TODO implement
146 (initiating 0.0000);; TODO implement
147 (responding 0.0000);; TODO implement
148 (control 0.0000);; TODO implement
149 (expressive 0.0000);; TODO implement
150 (interpersonal 0.0000);; TODO implement
151 (clausesl1 0.0000)
152 ;; (errorsl1 0.0000);; TODO implement
153 (clausesl2 0.0000)
154 (errorsl2 0.0000)
155 (duration nil)
156 (role nil)
157 (context nil)
158 (demand nil)
159 ;; (clausesmessage nil)
160 (number nil))
161
162 (dolist (episode episodes)
163 (let*((numbernode (xml-get-children episode 'number))
164 (tasknode (xml-get-children episode 'task)))
165
166 (setq number (nth 2 (car numbernode)))
167 (when (equal episodenumber number)
168 (let* ((durationnode (xml-get-children episode 'duration))
169 (transcription (xml-get-children episode 'transcription)))
170
171 (setq duration (nth 2 (car durationnode)))
172
173 (dolist (task tasknode)
174 (let* ((rolenode (xml-get-children task 'role))
175 (contextnode (xml-get-children task 'context))
176 (demandnode (xml-get-children task 'demand)))
177
178 (setq role (nth 2 (car rolenode)))
179 (setq context (nth 2 (car contextnode)))
180 (setq demand (nth 2 (car demandnode)))
181 ;; (with-current-buffer "Statistics Output"
182 ;; (insert (format "role: %s; context: %s; demand: %s\n" role context demand)))
183 ))
184
185 (dolist (turn transcription)
186 (let* ((interventionnode (xml-get-children turn
187 (intern personid))))
188
189 (dolist (intervention interventionnode)
190 (let* ((l2node (xml-get-children intervention 'l2))
191 (l1node (xml-get-children intervention 'l1)))
192
193 (dolist (l2turn l2node)
194 (let* ((l2 (nth 2 l2turn))
195 (attrs (nth 1 l2turn))
196 (clausesl2nodeinc (cdr (assq 'clauses attrs)))
197 (errorsl2inc (cdr (assq 'errors attrs)))
198 (function (cdr (assq 'function attrs))))
199
200 (when (string-equal function "initiating")
201 (setq initiating (+ initiating 1)))
202 (when (string-equal function "responding")
203 (setq responding (+ responding 1)))
204 (when (string-equal function "control")
205 (setq control (+ control 1)))
206 (when (string-equal function "expressive")
207 (setq expressive (+ expressive 1)))
208 (when (string-equal function "interpersonal")
209 (setq interpersonal (+ interpersonal 1)))
210 (when attrs
211 (setq clausesl2 (+ clausesl2 (string-to-number
212 clausesl2nodeinc)))
213 (setq errorsl2 (+ errorsl2 (string-to-number
214 errorsl2inc))))
215 (when l2
216 ;; (add-to-list 'interventionsl2 l2)
217 (cl-pushnew l2 interventionsl2 :test #'equal)
218 (setq asunitsl2 (1+ asunitsl2)))))
219 (dolist (l1turn l1node)
220 (let*((l1 (nth 2 l1turn))
221 (clausesl1node (nth 1 l1turn))
222 (clausesl1nodeinc (cdr (car clausesl1node))))
223
224 (when (not (equal clausesl1node nil))
225 (setq clausesl1 (+ clausesl1 (string-to-number
226 clausesl1nodeinc))))
227 (when l1
228 ;; (add-to-list 'interventionsl1 l1)
229 (cl-pushnew l1 interventionsl1 :test #'equal)
230 (setq asunitsl1 (1+ asunitsl1)))))))))))))
231 (reverse interventionsl2)
232 ;; (write-region (format "%s" interventionsl2) nil (format "transcribe-output-%s-%s-l2.txt" episodenumber personid))
233 ;; Write raw interventions to file will be supported by a different function
234 (reverse interventionsl1)
235 ;; (write-region (format "%s" interventionsl1) nil (format "transcribe-output-%s-%s-l1.txt" episodenumber personid))
236 ;; (print interventionsl2) ;uncomment to display all the interventions on screen
237 (let((asunitspersecondl2 (/ asunitsl2 (string-to-number duration)))
238 (clausesperasunitl2 (/ clausesl2 asunitsl2))
239 (errorsperasunitl2 (/ errorsl2 asunitsl2))
240 (asunitspersecondl1 (/ asunitsl1 (string-to-number duration)))
241 ;; (clausesperasunitl1 (/ clausesl1 asunitsl1))
242 (initiatingperasunitl2 (/ initiating asunitsl2))
243 (respondingperasunitl2 (/ responding asunitsl2))
244 (controlperasunitl2 (/ control asunitsl2))
245 (expressiveperasunitl2 (/ expressive asunitsl2))
246 (interpersonalperasunitl2 (/ interpersonal asunitsl2)))
247
248 ;; (princ clausesmessage)
249 (princ (format "episode: %s, duration: %s, person: %s\n" episodenumber duration personid))
250 (with-current-buffer "Statistics Output"
251 (insert (format "%s,%s,%s,0,0,%s,%s,%s,%s,%s,QUAN-L2,segmented,aux,level,subject,yearofclil,month\n" personid episodenumber duration role context demand asunitspersecondl2 asunitspersecondl1)))
252 (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L2(errors/Asunit):%s, L1(Asunits/second): %s\n"
253 asunitspersecondl2 clausesperasunitl2 errorsperasunitl2 asunitspersecondl1))
254 (princ (format "Functions/unit: Initiating: %s, Responding: %s, Control: %s, Expressive: %s, Interpersonal: %s" initiatingperasunitl2 respondingperasunitl2 controlperasunitl2 expressiveperasunitl2 interpersonalperasunitl2)))))
255
256 (defun transcribe-analyze-all ()
257 "Analyze all file and output to 'Statistics Output' buffer. The buffer will
258 lost all previous data. The data in the buffer can be saved to a file and be
259 passed to 'R' for statistical analysis."
260 (interactive)
261 (let* ((xml (xml-parse-region (point-min) (point-max)))
262 (results (car xml))
263 (episodes (xml-get-children results 'episode)))
264
265 (with-current-buffer "Statistics Output"
266 (erase-buffer)
267 (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n"))
268 (dolist (episode episodes)
269 (let* ((numbernode (xml-get-children episode 'number))
270 (participantsnode (xml-get-children episode 'participants))
271 ;; (transcription (xml-get-children episode 'transcription))
272 (number (nth 2 (car numbernode)))
273 (participantsstring (nth 2 (car participantsnode)))
274 (participants (split-string participantsstring)))
275
276 (dolist (participant participants)
277 (transcribe-analyze number participant))))))
278
279
280 (defun transcribe-xml-tag-person (xmltag)
281 "This function allows the automatic insetion of a speaker xml tag and places the cursor."
282 (interactive "stag:")
283 (insert (format "<%s move=\"\"></%s>" xmltag xmltag))
284 (backward-char 3)
285 (backward-char (string-width xmltag)))
286
287 (defun transcribe-xml-tag (xmltag)
288 "This function allows the automatic insetion of a custom xml tag and places the cursor."
289 (interactive "stag:")
290 (insert (format "<%s></%s>" xmltag xmltag))
291 (backward-char 3)
292 (backward-char (string-width xmltag)))
293
294 (defun transcribe-region-xml-tag (xmltag)
295 "This function encapsulates the marked region in the given tag."
296 (interactive "stag:")
297 (let ((beginning (region-beginning))
298 (end (region-end)))
299 (goto-char beginning)
300 (insert (format "<%s>" xmltag))
301 (goto-char end)
302 (insert (format "</%s>" xmltag))))
303
304 (defun transcribe-add-attribute (att val)
305 "Adds a xml attribute at cursor with the name and value specified (autocompletion possible)"
306 (interactive (list(completing-read "attibute name:" transcribe-attribute-list)(read-string "value:")))
307 (insert (format "%s=\"%s\"" att val)))
308
309 (defun transcribe-add-attribute-function (val)
310 "Adds the xml attribute 'function' at cursor with the name specified (autocompletion possible)"
311 (interactive (list(completing-read "function name:" transcribe-function-list)))
312 (insert (format "function=\"%s\"" val)))
313
314 (defun transcribe-add-attribute-move (val)
315 "Adds the xml attribute 'move' at cursor with the name specified (autocompletion possible"
316 (interactive (list(completing-read "move name:" transcribe-move-list)))
317 (insert (format "move=\"%s\"" val)))
318
319 (defun transcribe-xml-tag-l1 ()
320 "Inserts a l1 tag and places the cursor"
321 (interactive)
322 (insert "<l1 clauses=\"1\" errors=\"0\" function=\"\"></l1>")
323 (backward-char 5))
324
325 (defun transcribe-xml-tag-l2 ()
326 "Inserts a l2 tag and places the cursor"
327 (interactive)
328 (insert "<l2 clauses=\"1\" errors=\"0\" function=\"\"></l2>")
329 (backward-char 5))
330
331 (defun transcribe-xml-tag-break (xmltag)
332 "This function breaks an unit into two. That is, insert a closing and an opening equal tags"
333 (interactive "stag:")
334 (insert (format "</%s><%s>" xmltag xmltag)))
335
336 (defun transcribe-display-audio-info ()
337 (interactive)
338 (emms-player-mpg321-remote-proc)
339 (shell-command "/usr/bin/mpg321 -R - &"))
340
341
342 (fset 'NewEpisode
343 "<episode>\n<number>DATE-NUMBER</number>\n<duration></duration>\n<comment></comment>\n<subject>Subject (level)</subject>\n<participants><\participants>\n<task>\n\t<role>low or high</role>\n<context>low or high</context>\n<demand>low or high</demand>\r</task>\n<auxiliar>Yes/no</auxiliar>\n<transcription>\n</transcription>\n</episode>");Inserts a new episode structure
344
345
346 (defvar transcribe-mode-map
347 (let ((map (make-sparse-keymap)))
348 (define-key map (kbd "C-x C-p") 'transcribe-play-file)
349 (define-key map (kbd "C-x C-a") 'transcribe-analyze)
350 (define-key map (kbd "C-x C-n") 'NewEpisode)
351 (define-key map (kbd "C-x <down>") 'emms-stop)
352 (define-key map (kbd "C-x <right>") 'emms-seek-forward)
353 (define-key map (kbd "C-x <left>") 'emms-seek-backward)
354 (define-key map (kbd "<f2>") 'transcribe-add-attribute-move)
355 (define-key map (kbd "<f3>") 'transcribe-add-attribute-function)
356 (define-key map (kbd "<f4>") 'transcribe-add-attribute)
357 (define-key map (kbd "<f5>") 'emms-pause)
358 (define-key map (kbd "<f8>") 'emms-seek)
359 (define-key map (kbd "<f9>") 'transcribe-xml-tag)
360 (define-key map (kbd "<f10>") 'transcribe-xml-tag-person)
361 (define-key map (kbd "<f11>") 'transcribe-xml-l1)
362 (define-key map (kbd "<f12>") 'transcribe-xml-l2)
363 map)
364 "Keymap for Transcribe minor mode.")
365
366
367 (easy-menu-define transcribe-mode-menu transcribe-mode-map
368 "Menu for Transcribe mode"
369 '("Transcribe"
370 ["Raw Output" transcribe-raw-to-buffer]
371 "---"
372 ["Analyze" transcribe-analyze]
373 ["Analyze all" arbitools-analyze-all]
374 "---"
375 ["Add transcription header" NewEpisode]
376 ["Add move attribute" transcribe-add-attribute-move]
377 ["Add function attribute" transcribe-add-attribute-function]
378 ["Add L1 intervention" transcribe-xml-l1]
379 ["Add L2 intervention" transcribe-xml-l2]
380 ["Add move" transcribe-xml-tag-person]
381 "---"
382 ["Play audio file" transcribe-play-file]
383 ))
384
385
386 ;;;###autoload
387 (define-minor-mode transcribe-mode
388 "Toggle transcribe-mode"
389 nil
390 " Trans"
391 transcribe-mode-map
392 (generate-new-buffer "Statistics Output")
393 (generate-new-buffer "Raw Output")
394 (with-current-buffer "Statistics Output"
395 ;; (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n")
396 )
397 ;; TODO: save the students present in transcription in list so that we can use that list for transcribe-analyze-all
398 )
399
400 (provide 'transcribe)
401
402 ;;; transcribe.el ends here