]> code.delx.au - gnu-emacs-elpa/blob - packages/transcribe/transcribe.el
* packages/transcribe: New release: new functions and menus
[gnu-emacs-elpa] / packages / transcribe / transcribe.el
1 ;;; transcribe.el --- Package for audio transcriptions
2
3 ;; Copyright 2014-2015 Free Software Foundation, Inc.
4
5 ;; Author: David Gonzalez Gandara <dggandara@member.fsf.org>
6 ;; Version: 1.3.0
7
8 ;; This program is free software: you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation, either version 3 of the License, or
11 ;; (at your option) any later version.
12 ;;
13 ;; This program is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21 ;;; Commentary:
22
23 ;; REQUIRES:
24 ;; -----------------------------
25 ;; This module works without any requires, but in order to use the audio
26 ;; functions, you need to install the emacs package "emms", by Joe Drew,
27 ;; and the external program "mpg321", by Jorgen Schafer and Ulrik Jensen,
28 ;; both under GPL licenses.
29 ;;
30 ;; USAGE:
31 ;; -------------------------
32 ;; Transcribe is a tool to make audio transcriptions for discourse analysis
33 ;; in the classroom.
34 ;; It allows the transcriber to control the audio easily while typing, as well as
35 ;; automate the insertion of xml tags, in case the transcription protocol
36 ;; include them.
37 ;; The analysis functions will search for a specific structure
38 ;; of episodes that can be automatically added with the macro NewEpisode.
39 ;; The function expects the speech acts to be transcribed inside a turn xml
40 ;; tag with the identifier of the speaker with optional move attribute.
41 ;; Each speech act is spected inside a <l1> or <l2> tag, depending
42 ;; on the language used by the person. The attributes expected are the
43 ;; number of clauses that form the utterance, the number of errors the
44 ;; transcriber observes, and the function of the speech act. The parser will
45 ;; work even if some attributes are missing.
46 ;;
47 ;;
48 ;; AUDIO COMMANDS
49 ;; ------------------------------
50 ;; C-x C-p ------> Play audio file. You will be prompted for the name
51 ;; of the file. The recommended format is mp2.
52 ;; <f5> ---------> Pause or play audio.
53 ;; C-x <right> --> seek audio 10 seconds forward.
54 ;; C-x <left> --->seek audio 10 seconds backward.
55 ;; <f8> ---------> seek interactively: positive seconds go forward and
56 ;; negative seconds go backward
57 ;;
58 ;; XML TAGGING COMMANDS
59 ;; --------------------------------------------------
60 ;; C-x C-n ------> Create new episode structure. This is useful in case your
61 ;; xml file structure requires it.
62 ;; <f2> ---------> Interactively insert a function attribute in a speech act
63 ;; (l1 or l2) tag.
64 ;; <f3> ---------> Interactively insert a move attribute in a turn (person) tag
65 ;; <f4> ---------> Interactively insert an attribute (any kind)
66 ;; <f9> ---------> Insert turn (person) tag. Inserts a move attribute.
67 ;; <f10> --------> Insert a custom tag. Edit the function to adapt to your needs.
68 ;; <f11> --------> Insert speech act tag in L1, with clauses, errors and function
69 ;; attributes.
70 ;; <f12> --------> Insert speech act tag in L2, with clauses, errors and function
71 ;; attributes.
72 ;;
73 ;; AUTOMATIC PARSING
74 ;; -----------------------------------------------------
75 ;; C-x C-a ------> Analyses the text for measurments of performance.
76
77 ;;; Code:
78
79 (if t (require 'emms-setup))
80 ;(require 'emms-player-mpd)
81 ;(setq emms-player-mpd-server-name "localhost")
82 ;(setq emms-player-mpd-server-port "6600")
83
84 (emms-standard)
85 (emms-default-players)
86 (if t (require 'emms-player-mpg321-remote))
87 (defvar emms-player-list)
88 (push 'emms-player-mpg321-remote emms-player-list)
89
90 (if t (require 'emms-mode-line))
91 (emms-mode-line 1)
92 (if t (require 'emms-playing-time))
93 (emms-playing-time 1)
94
95 (defvar transcribe-function-list '("initiating" "responding" "control" "expresive" "interpersonal"))
96 (defvar transcribe-move-list '("initiation" "response" "follow-up"))
97 (defvar transcribe-attribute-list '("clauses" "errors" "function" "move"))
98 ;(append transcribe-attribute-list transcribe-function-list transcribe-move-list)
99
100 (defun transcribe-analyze-episode (episode person)
101 "This calls the external python package analyze_episodes2.py. The new
102 function transcribe-analyze implements its role now."
103 (interactive "sepisode: \nsperson:")
104 (shell-command (concat (expand-file-name "analyze_episodes2.py")
105 " -e " episode " -p " person " -i " buffer-file-name )))
106
107 (defun transcribe-raw-to-buffer ()
108 "EXPERIMENTAL - Convert the xml tagged transcription to raw transcription, with the names
109 and the persons and the utterances only. The raw transcription will be send to buffer called
110 'Raw Output'"
111 (interactive)
112 (let* ((interventionsl2 '())
113 (interventionsl1 '())
114 (xml (xml-parse-region (point-min) (point-max)))
115 (results (car xml))
116 (episodes (xml-get-children results 'episode))
117 (clausesmessage nil)
118 (number nil))
119
120 (dolist (episode episodes)
121 (let* ((transcription (xml-get-children episode 'transcription))
122 (participantsnode (xml-get-children episode 'participants))
123 (participantsstring (nth 2 (car participantsnode)))
124 (participants (split-string participantsstring)))
125
126 (dolist (turn transcription)
127 (dolist (intervention (xml-node-children turn))
128 (when (listp intervention)
129 (save-excursion
130 (set-buffer "Raw Output")
131 (insert (format "%s: " (car intervention)))
132 (dolist (utterance (nthcdr 2 intervention))
133 (when (listp utterance)
134 (insert (format "%s " (nth 2 utterance)))))
135 (insert "\n")))))))))
136
137 (defun transcribe-analyze (episodenumber personid)
138 "Extract from a given episode and person the number of asunits per
139 second produced, and the number of clauses per asunits, for L2 and L1.
140 It writes two output files, one for L2 utterances and one for L1
141 utterances, so that they can be used with external programs. Output will
142 be inserted in 'Statistics Output' buffer"
143 (interactive "sepisodenumber: \nspersonid:")
144 (let* ((interventionsl2 '())
145 (interventionsl1 '())
146 (xml (xml-parse-region (point-min) (point-max)))
147 (results (car xml))
148 (episodes (xml-get-children results 'episode))
149 (asunitsl2 0.0000)
150 (asunitsl1 0.0000)
151 (shifts 0.0000);; TODO implement
152 (initiating 0.0000);; TODO implement
153 (responding 0.0000);; TODO implement
154 (control 0.0000);; TODO implement
155 (expressive 0.0000);; TODO implement
156 (interpersonal 0.0000);; TODO implement
157 (clausesl1 0.0000)
158 (errorsl1 0.0000);; TODO implement
159 (clausesl2 0.0000)
160 (errorsl2 0.0000)
161 (duration nil)
162 (role nil)
163 (context nil)
164 (demand nil)
165 (clausesmessage nil)
166 (number nil))
167
168 (dolist (episode episodes)
169 (let*((numbernode (xml-get-children episode 'number))
170 (tasknode (xml-get-children episode 'task)))
171
172 (setq number (nth 2 (car numbernode)))
173 (when (equal episodenumber number)
174 (let* ((durationnode (xml-get-children episode 'duration))
175 (transcription (xml-get-children episode 'transcription)))
176
177 (setq duration (nth 2 (car durationnode)))
178
179 (dolist (task tasknode)
180 (let* ((rolenode (xml-get-children task 'role))
181 (contextnode (xml-get-children task 'context))
182 (demandnode (xml-get-children task 'demand)))
183
184 (setq role (nth 2 (car rolenode)))
185 (setq context (nth 2 (car contextnode)))
186 (setq demand (nth 2 (car demandnode)))
187 ;; (save-excursion
188 ;; (set-buffer "Statistics Output")
189 ;; (insert (format "role: %s; context: %s; demand: %s\n" role context demand)))
190 ))
191
192 (dolist (turn transcription)
193 (let* ((interventionnode (xml-get-children turn
194 (intern personid))))
195
196 (dolist (intervention interventionnode)
197 (let* ((l2node (xml-get-children intervention 'l2))
198 (l1node (xml-get-children intervention 'l1)))
199
200 (dolist (l2turn l2node)
201 (let* ((l2 (nth 2 l2turn))
202 (attrs (nth 1 l2turn))
203 (clausesl2nodeinc (cdr (assq 'clauses attrs)))
204 (errorsl2inc (cdr (assq 'errors attrs)))
205 (function (cdr (assq 'function attrs))))
206
207 (when (string-equal function "initiating")
208 (setq initiating (+ initiating 1)))
209 (when (string-equal function "responding")
210 (setq responding (+ responding 1)))
211 (when (string-equal function "control")
212 (setq control (+ control 1)))
213 (when (string-equal function "expressive")
214 (setq expressive (+ expressive 1)))
215 (when (string-equal function "interpersonal")
216 (setq interpersonal (+ interpersonal 1)))
217 (when (not (equal attrs nil))
218 (setq clausesl2 (+ clausesl2 (string-to-number
219 clausesl2nodeinc)))
220 (setq errorsl2 (+ errorsl2 (string-to-number
221 errorsl2inc))))
222 (when (not (equal l2 nil))
223 (add-to-list 'interventionsl2 l2)
224 (setq asunitsl2 (1+ asunitsl2)))))
225 (dolist (l1turn l1node)
226 (let*((l1 (nth 2 l1turn))
227 (clausesl1node (nth 1 l1turn))
228 (clausesl1nodeinc (cdr (car clausesl1node))))
229
230 (when (not (equal clausesl1node nil))
231 (setq clausesl1 (+ clausesl1 (string-to-number
232 clausesl1nodeinc))))
233 (when (not (equal l1 nil))
234 (add-to-list 'interventionsl1 l1)
235 (setq asunitsl1 (1+ asunitsl1)))))))))))))
236 (reverse interventionsl2)
237 ;; (write-region (format "%s" interventionsl2) nil (format "transcribe-output-%s-%s-l2.txt" episodenumber personid))
238 ;; Write raw interventions to file will be supported by a different function
239 (reverse interventionsl1)
240 ;; (write-region (format "%s" interventionsl1) nil (format "transcribe-output-%s-%s-l1.txt" episodenumber personid))
241 ;; (print interventionsl2) ;uncomment to display all the interventions on screen
242 (let((asunitspersecondl2 (/ asunitsl2 (string-to-number duration)))
243 (clausesperasunitl2 (/ clausesl2 asunitsl2))
244 (errorsperasunitl2 (/ errorsl2 asunitsl2))
245 (asunitspersecondl1 (/ asunitsl1 (string-to-number duration)))
246 (clausesperasunitl1 (/ clausesl1 asunitsl1))
247 (initiatingperasunitl2 (/ initiating asunitsl2))
248 (respondingperasunitl2 (/ responding asunitsl2))
249 (controlperasunitl2 (/ control asunitsl2))
250 (expressiveperasunitl2 (/ expressive asunitsl2))
251 (interpersonalperasunitl2 (/ interpersonal asunitsl2)))
252
253 ;; (princ clausesmessage)
254 (princ (format "episode: %s, duration: %s, person: %s\n" episodenumber duration personid))
255 (save-excursion
256 (set-buffer "Statistics Output")
257 (insert (format "%s,%s,%s,0,0,%s,%s,%s,%s,%s,QUAN-L2,segmented,aux,level,subject,yearofclil,month\n" personid episodenumber duration role context demand asunitspersecondl2 asunitspersecondl1))
258 )
259 (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L2(errors/Asunit):%s, L1(Asunits/second): %s\n"
260 asunitspersecondl2 clausesperasunitl2 errorsperasunitl2 asunitspersecondl1))
261 (princ (format "Functions/unit: Initiating: %s, Responding: %s, Control: %s, Expressive: %s, Interpersonal: %s" initiatingperasunitl2 respondingperasunitl2 controlperasunitl2 expressiveperasunitl2 interpersonalperasunitl2)))))
262
263 (defun transcribe-analyze-all ()
264 "Analyze all file and output to 'Statistics Output' buffer. The buffer will
265 lost all previous data. The data in the buffer can be saved to a file and be
266 passed to 'R' for statistical analysis."
267 (interactive)
268 (let* (
269 (xml (xml-parse-region (point-min) (point-max)))
270 (results (car xml))
271 (episodes (xml-get-children results 'episode)))
272
273 (save-excursion
274 (set-buffer "Statistics Output")
275 (erase-buffer)
276 (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n"))
277 (dolist (episode episodes)
278 (let* ((numbernode (xml-get-children episode 'number))
279 (participantsnode (xml-get-children episode 'participants))
280 (transcription (xml-get-children episode 'transcription))
281 (number (nth 2 (car numbernode)))
282 (participantsstring (nth 2 (car participantsnode)))
283 (participants (split-string participantsstring)))
284
285 (dolist (participant participants)
286 (transcribe-analyze number participant))))))
287
288
289 (defun transcribe-xml-tag-person (xmltag)
290 "This function allows the automatic insetion of a speaker xml tag and places the cursor."
291 (interactive "stag:")
292 (insert (format "<%s move=\"\"></%s>" xmltag xmltag))
293 (backward-char 3)
294 (backward-char (string-width xmltag)))
295
296 (defun transcribe-xml-tag (xmltag)
297 "This function allows the automatic insetion of a custom xml tag and places the cursor."
298 (interactive "stag:")
299 (insert (format "<%s></%s>" xmltag xmltag))
300 (backward-char 3)
301 (backward-char (string-width xmltag)))
302
303 (defun transcribe-region-xml-tag (xmltag)
304 "This function encapsulates the marked region in the given tag."
305 (interactive "stag:")
306 (let ((beginning (region-beginning))
307 (end (region-end)))
308 (goto-char beginning)
309 (insert (format "<%s>" xmltag))
310 (goto-char end)
311 (insert (format "</%s>" xmltag))))
312
313 (defun transcribe-add-attribute (att val)
314 "Adds a xml attribute at cursor with the name and value specified (autocompletion possible)"
315 (interactive (list(completing-read "attibute name:" transcribe-attribute-list)(read-string "value:")))
316 (insert (format "%s=\"%s\"" att val)))
317
318 (defun transcribe-add-attribute-function (val)
319 "Adds the xml attribute 'function' at cursor with the name specified (autocompletion possible)"
320 (interactive (list(completing-read "function name:" transcribe-function-list)))
321 (insert (format "function=\"%s\"" val)))
322
323 (defun transcribe-add-attribute-move (val)
324 "Adds the xml attribute 'move' at cursor with the name specified (autocompletion possible"
325 (interactive (list(completing-read "move name:" transcribe-move-list)))
326 (insert (format "move=\"%s\"" val)))
327
328 (defun transcribe-xml-tag-l1 ()
329 "Inserts a l1 tag and places the cursor"
330 (interactive)
331 (insert "<l1 clauses=\"1\" errors=\"0\" function=\"\"></l1>")
332 (backward-char 5))
333
334 (defun transcribe-xml-tag-l2 ()
335 "Inserts a l2 tag and places the cursor"
336 (interactive)
337 (insert "<l2 clauses=\"1\" errors=\"0\" function=\"\"></l2>")
338 (backward-char 5))
339
340 (defun transcribe-xml-tag-break (xmltag)
341 "This function breaks an unit into two. That is, insert a closing and an opening equal tags"
342 (interactive "stag:")
343 (insert (format "</%s><%s>" xmltag xmltag)))
344
345 (defun transcribe-display-audio-info ()
346 (interactive)
347 (emms-player-mpg321-remote-proc)
348 (shell-command "/usr/bin/mpg321 -R - &"))
349
350
351 (fset 'NewEpisode
352 "<episode>\n<number>DATE-NUMBER</number>\n<duration></duration>\n<comment></comment>\n<subject>Subject (level)</subject>\n<participants><\participants>\n<task>\n\t<role>low or high</role>\n<context>low or high</context>\n<demand>low or high</demand>\r</task>\n<auxiliar>Yes/no</auxiliar>\n<transcription>\n</transcription>\n</episode>");Inserts a new episode structure
353
354
355 (defvar transcribe-mode-map
356 (let ((map (make-sparse-keymap)))
357 (define-key map (kbd "C-x C-a") 'transcribe-analyze)
358 map)
359 "Keymap for Transcribe minor mode.")
360
361
362 (easy-menu-define transcribe-mode-menu transcribe-mode-map
363 "Menu for Transcribe mode"
364 '("Transcribe"
365 ["Raw Output" transcribe-raw-to-buffer]
366 "---"
367 ["Analyze" transcribe-analyze]
368 ["Analyze all" arbitools-analyze-all]
369 ))
370
371
372 ;;;###autoload
373 (define-minor-mode transcribe-mode
374 "Toggle transcribe-mode"
375 nil
376 " Trans"
377 '(([?\C-x ?\C-p] . emms-play-file)
378 ([?\C-x ?\C-a] . transcribe-analyze)
379 ([?\C-x ?\C-n] . NewEpisode)
380 ([?\C-x down] . emms-stop)
381 ([?\C-x right] . emms-seek-forward)
382 ([?\C-x left] . emms-seek-backward)
383
384 ([f2] . transcribe-add-attribute-function)
385 ([f3] . transcribe-add-attribute-move)
386 ([f4] . transcribe-add-attribute)
387
388 ([f5] . emms-pause)
389 ([f8] . emms-seek)
390
391 ([f9] . transcribe-xml-tag)
392 ([f10] . transcribe-xml-tag-person)
393 ([f11] . transcribe-xml-tag-l1)
394 ([f12] . transcribe-xml-tag-l2))
395 (generate-new-buffer "Statistics Output")
396 (generate-new-buffer "Raw Output")
397 (save-excursion
398 (set-buffer "Statistics Output")
399 ;; (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n")
400 )
401 ;; TODO: save the students present in transcription in list so that we can use that list for transcribe-analyze-all
402 )
403
404 (provide 'transcribe)
405
406 ;;; transcribe.el ends here