]>
code.delx.au - pulseaudio/blob - src/pulsecore/svolume_mmx.c
2 This file is part of PulseAudio.
4 Copyright 2004-2006 Lennart Poettering
5 Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
7 PulseAudio is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 2.1 of the License,
10 or (at your option) any later version.
12 PulseAudio is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with PulseAudio; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
29 #include <pulsecore/random.h>
30 #include <pulsecore/macro.h>
31 #include <pulsecore/g711.h>
32 #include <pulsecore/core-util.h>
34 #include "sample-util.h"
35 #include "endianmacros.h"
39 pa_volume_u8_mmx (uint8_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
43 for (channel
= 0; length
; length
--) {
46 hi
= volumes
[channel
] >> 16;
47 lo
= volumes
[channel
] & 0xFFFF;
49 t
= (int32_t) *samples
- 0x80;
50 t
= ((t
* lo
) >> 16) + (t
* hi
);
51 t
= PA_CLAMP_UNLIKELY(t
, -0x80, 0x7F);
52 *samples
++ = (uint8_t) (t
+ 0x80);
54 if (PA_UNLIKELY(++channel
>= channels
))
60 pa_volume_alaw_mmx (uint8_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
64 for (channel
= 0; length
; length
--) {
67 hi
= volumes
[channel
] >> 16;
68 lo
= volumes
[channel
] & 0xFFFF;
70 t
= (int32_t) st_alaw2linear16(*samples
);
71 t
= ((t
* lo
) >> 16) + (t
* hi
);
72 t
= PA_CLAMP_UNLIKELY(t
, -0x8000, 0x7FFF);
73 *samples
++ = (uint8_t) st_13linear2alaw((int16_t) t
>> 3);
75 if (PA_UNLIKELY(++channel
>= channels
))
81 pa_volume_ulaw_mmx (uint8_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
85 for (channel
= 0; length
; length
--) {
88 hi
= volumes
[channel
] >> 16;
89 lo
= volumes
[channel
] & 0xFFFF;
91 t
= (int32_t) st_ulaw2linear16(*samples
);
92 t
= ((t
* lo
) >> 16) + (t
* hi
);
93 t
= PA_CLAMP_UNLIKELY(t
, -0x8000, 0x7FFF);
94 *samples
++ = (uint8_t) st_14linear2ulaw((int16_t) t
>> 2);
96 if (PA_UNLIKELY(++channel
>= channels
))
103 pa_volume_s16ne_mmx (int16_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
105 int64_t channel
, temp
;
107 /* the max number of samples we process at a time */
108 channels
= MAX (4, channels
);
110 #define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \
111 " pxor %%mm4, %%mm4 \n\t" \
112 " punpcklwd %%mm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \
113 " pcmpgtw "#s", %%mm4 \n\t" /* select sign from sample */ \
114 " pand "#v", %%mm4 \n\t" /* extract correction factors */ \
115 " movq "#s", %%mm5 \n\t" \
116 " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \
117 " psubd %%mm4, "#s" \n\t" /* sign correction */ \
118 " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \
119 " pmaddwd %%mm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \
120 " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \
121 " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */
123 #define MOD_ADD(a,b) \
124 " add "#a", %3 \n\t" \
127 " cmp %3, "#b" \n\t" \
128 " cmovae %4, %3 \n\t"
130 __asm__
__volatile__ (
132 " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
134 " test $1, %2 \n\t" /* check for odd samples */
137 " movd (%1, %3, 4), %%mm0 \n\t" /* do odd samples */
138 " movw (%0), %%ax \n\t"
139 " movd %%eax, %%mm1 \n\t"
140 VOLUME_32x16 (%%mm1
, %%mm0
)
141 " movd %%mm0, %%eax \n\t"
142 " movw %%ax, (%0) \n\t"
148 " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
149 " test $1, %2 \n\t" /* check for odd samples */
152 "3: \n\t" /* do samples in pairs of 2 */
153 " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
154 " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */
155 VOLUME_32x16 (%%mm1
, %%mm0
)
156 " movd %%mm0, (%0) \n\t"
162 " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
166 "5: \n\t" /* do samples in pairs of 4 */
167 " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
168 " movq 8(%1, %3, 4), %%mm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
169 " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */
170 " movd 4(%0), %%mm3 \n\t" /* X | X | p3 | p2 */
171 VOLUME_32x16 (%%mm1
, %%mm0
)
172 VOLUME_32x16 (%%mm3
, %%mm2
)
173 " movd %%mm0, (%0) \n\t"
174 " movd %%mm2, 4(%0) \n\t"
183 : "+r" (samples
), "+r" (volumes
), "+r" (length
), "=D" ((int64_t)channel
), "=r" (temp
)
184 : "r" ((int64_t)channels
)
191 pa_volume_s16re_mmx (int16_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
195 length
/= sizeof (int16_t);
197 for (channel
= 0; length
; length
--) {
200 hi
= volumes
[channel
] >> 16;
201 lo
= volumes
[channel
] & 0xFFFF;
203 t
= (int32_t) PA_INT16_SWAP(*samples
);
204 t
= ((t
* lo
) >> 16) + (t
* hi
);
205 t
= PA_CLAMP_UNLIKELY(t
, -0x8000, 0x7FFF);
206 *samples
++ = PA_INT16_SWAP((int16_t) t
);
208 if (PA_UNLIKELY(++channel
>= channels
))
214 pa_volume_float32ne_mmx (float *samples
, float *volumes
, unsigned channels
, unsigned length
)
218 length
/= sizeof (float);
220 for (channel
= 0; length
; length
--) {
221 *samples
++ *= volumes
[channel
];
223 if (PA_UNLIKELY(++channel
>= channels
))
229 pa_volume_float32re_mmx (float *samples
, float *volumes
, unsigned channels
, unsigned length
)
233 length
/= sizeof (float);
235 for (channel
= 0; length
; length
--) {
238 t
= PA_FLOAT32_SWAP(*samples
);
239 t
*= volumes
[channel
];
240 *samples
++ = PA_FLOAT32_SWAP(t
);
242 if (PA_UNLIKELY(++channel
>= channels
))
248 pa_volume_s32ne_mmx (int32_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
252 length
/= sizeof (int32_t);
254 for (channel
= 0; length
; length
--) {
257 t
= (int64_t)(*samples
);
258 t
= (t
* volumes
[channel
]) >> 16;
259 t
= PA_CLAMP_UNLIKELY(t
, -0x80000000LL
, 0x7FFFFFFFLL
);
260 *samples
++ = (int32_t) t
;
262 if (PA_UNLIKELY(++channel
>= channels
))
268 pa_volume_s32re_mmx (int32_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
272 length
/= sizeof (int32_t);
274 for (channel
= 0; length
; length
--) {
277 t
= (int64_t) PA_INT32_SWAP(*samples
);
278 t
= (t
* volumes
[channel
]) >> 16;
279 t
= PA_CLAMP_UNLIKELY(t
, -0x80000000LL
, 0x7FFFFFFFLL
);
280 *samples
++ = PA_INT32_SWAP((int32_t) t
);
282 if (PA_UNLIKELY(++channel
>= channels
))
288 pa_volume_s24ne_mmx (uint8_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
293 e
= samples
+ length
;
295 for (channel
= 0; samples
< e
; samples
+= 3) {
298 t
= (int64_t)((int32_t) (PA_READ24NE(samples
) << 8));
299 t
= (t
* volumes
[channel
]) >> 16;
300 t
= PA_CLAMP_UNLIKELY(t
, -0x80000000LL
, 0x7FFFFFFFLL
);
301 PA_WRITE24NE(samples
, ((uint32_t) (int32_t) t
) >> 8);
303 if (PA_UNLIKELY(++channel
>= channels
))
309 pa_volume_s24re_mmx (uint8_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
314 e
= samples
+ length
;
316 for (channel
= 0; samples
< e
; samples
+= 3) {
319 t
= (int64_t)((int32_t) (PA_READ24RE(samples
) << 8));
320 t
= (t
* volumes
[channel
]) >> 16;
321 t
= PA_CLAMP_UNLIKELY(t
, -0x80000000LL
, 0x7FFFFFFFLL
);
322 PA_WRITE24RE(samples
, ((uint32_t) (int32_t) t
) >> 8);
324 if (PA_UNLIKELY(++channel
>= channels
))
330 pa_volume_s24_32ne_mmx (uint32_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
334 length
/= sizeof (uint32_t);
336 for (channel
= 0; length
; length
--) {
339 t
= (int64_t) ((int32_t) (*samples
<< 8));
340 t
= (t
* volumes
[channel
]) >> 16;
341 t
= PA_CLAMP_UNLIKELY(t
, -0x80000000LL
, 0x7FFFFFFFLL
);
342 *samples
++ = ((uint32_t) ((int32_t) t
)) >> 8;
344 if (PA_UNLIKELY(++channel
>= channels
))
350 pa_volume_s24_32re_mmx (uint32_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
)
354 length
/= sizeof (uint32_t);
356 for (channel
= 0; length
; length
--) {
359 t
= (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples
) << 8));
360 t
= (t
* volumes
[channel
]) >> 16;
361 t
= PA_CLAMP_UNLIKELY(t
, -0x80000000LL
, 0x7FFFFFFFLL
);
362 *samples
++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t
)) >> 8);
364 if (PA_UNLIKELY(++channel
>= channels
))
377 static void run_test (void) {
378 int16_t samples
[SAMPLES
];
379 int16_t samples_ref
[SAMPLES
];
380 int16_t samples_orig
[SAMPLES
];
381 int32_t volumes
[CHANNELS
];
383 pa_do_volume_func_t func
;
385 func
= pa_get_volume_func (PA_SAMPLE_S16NE
);
387 printf ("checking\n");
389 for (j
= 0; j
< TIMES
; j
++) {
390 pa_random (samples
, sizeof (samples
));
391 memcpy (samples_ref
, samples
, sizeof (samples
));
392 memcpy (samples_orig
, samples
, sizeof (samples
));
394 for (i
= 0; i
< CHANNELS
; i
++) {
395 volumes
[i
] = rand() >> 15;
398 pa_volume_s16ne_mmx (samples
, volumes
, CHANNELS
, SAMPLES
* sizeof (int16_t));
399 func (samples_ref
, volumes
, CHANNELS
, SAMPLES
* sizeof (int16_t));
401 for (i
= 0; i
< SAMPLES
; i
++) {
402 if (samples
[i
] != samples_ref
[i
]) {
403 printf ("%d: %04x != %04x (%04x * %04x)\n", i
, samples
[i
], samples_ref
[i
],
404 samples_orig
[i
], volumes
[i
% CHANNELS
]);
408 printf ("%d: %04x == %04x (%04x * %04x)\n", i
, samples
[i
], samples_ref
[i
],
409 samples_orig
[i
], volumes
[i
% CHANNELS
]);
416 void pa_volume_func_init_mmx (void) {
417 pa_log_info("Initialising MMX optimized functions.");
423 pa_set_volume_func (PA_SAMPLE_S16NE
, (pa_do_volume_func_t
) pa_volume_s16ne_mmx
);