前言

流媒体在网络传输中有可能会发生丢包现象,对于音频,丢包会导致解码后的声音听起来卡顿不连续,lowcFE是种算法,可在发生丢包时猜测丢包内容,模拟丢失的数据

lowcFE,全称为 Low Complexity Frame Erasure 低复杂度帧擦除(为什么叫擦除而不是恢复我也不懂),所谓低复杂度大概原因是其只支持8K采样的10ms数据吧

头文件 lowcfe.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/*
============================================================================
File: lowcfe.h V.1.0-24.MAY-2005
============================================================================

UGST/ITU-T G711 Appendix I PLC MODULE

GLOBAL FUNCTION PROTOTYPES

History:
24.May.05 v1.0 First version <AT&T>
Integration in STL2005 <Cyril Guillaume & Stephane Ragot - stephane.ragot@francetelecom.com>
============================================================================
*/
#ifndef __LOWCFE_C_H__
#define __LOWCFE_C_H__

#ifdef __cplusplus
extern "C" {
#endif

#ifdef USEDOUBLES
typedef double Float; /* likely to be bit-exact between machines */
#else
typedef float Float;
#endif

#define PITCH_MIN 40 /* minimum allowed pitch, 200 Hz */
#define PITCH_MAX 120 /* maximum allowed pitch, 66 Hz */
#define PITCHDIFF (PITCH_MAX - PITCH_MIN)
#define POVERLAPMAX (PITCH_MAX >> 2) /* maximum pitch OLA window */
#define HISTORYLEN (PITCH_MAX * 3 + POVERLAPMAX) /* history buffer length */
#define NDEC 2 /* 2:1 decimation */
#define CORRLEN 160 /* 20 msec correlation length */
#define CORRBUFLEN (CORRLEN + PITCH_MAX) /* correlation buffer length */
#define CORRMINPOWER ((Float)250.) /* minimum power */
#define EOVERLAPINCR 32 /* end OLA increment per frame, 4ms */
#define FRAMESZ 80 /* 10 msec at 8kHz */
#define ATTENFAC ((Float).2) /* attenuation factor per 10ms frame */
#define ATTENINCR (ATTENFAC/FRAMESZ) /* attenuation per sample */

typedef struct _LowcFE_c {
int erasecnt; /* consecutive erased frames */
int poverlap; /* overlap based on pitch */
int poffset; /* offset into pitch period */
int pitch; /* pitch estimate */
int pitchblen; /* current pitch buffer length */
Float *pitchbufend; /* end of pitch buffer */
Float *pitchbufstart; /* start of pitch buffer */
Float pitchbuf[HISTORYLEN]; /* buffer for cycles of speech */
Float lastq[POVERLAPMAX]; /* saved last quarter wavelengh */
short history[HISTORYLEN]; /* history buffer */
} LowcFE_c;

/* public functions */
void construct (LowcFE_c *); /* constructor */
void dofe (LowcFE_c *, short *s); /* synthesize speech for erasure */
void addtohistory (LowcFE_c *, short *s);
int get_param (LowcFE_c *) ;
/* add a good frame to history buffer */

#ifdef __cplusplus
}
#endif
#endif /* __LOWCFE_C_H__ */

源文件 lowcfe.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
/*                                                          24.May.2005 v.1.0
=============================================================================

U U GGG SSSS TTTTT
U U G S T
U U G GG SSSS T
U U G G S T
UUU GG SSS T

========================================
ITU-T - USER'S GROUP ON SOFTWARE TOOLS
========================================


=============================================================
COPYRIGHT NOTE: This source code, and all of its derivations,
is subject to the "ITU-T General Public License". Please have
it read in the distribution disk, or in the ITU-T
Recommendation G.191 on "SOFTWARE TOOLS FOR SPEECH AND AUDIO
CODING STANDARDS".
** This code has (C) Copyright by AT&T Corp. **
=============================================================


MODULE: G.711 Appendix I PLC module.

ORIGINAL BY: AT&T Corp.

FUNCTIONS:

construct: ...... LowcFE Constructor.

dofe: ........... Generate the synthetic signal.
At the beginning of an erasure determine the pitch, and extract
one pitch period from the tail of the signal. Do an OLA for 1/4
of the pitch to smooth the signal. Then repeat the extracted signal
for the length of the erasure. If the erasure continues for more than
10 msec, increase the number of periods in the pitchbuffer. At the end
of an erasure, do an OLA with the start of the first good frame.
The gain decays as the erasure gets longer.

addtohistory: ... A good frame was received and decoded.
If right after an erasure, do an overlap add with the synthetic signal.
Add the frame to history buffer.

HISTORY:

24.May.05 v1.0 Release of 1st G711 PLC module <AT&T>.
Integration of this module in STL2005 <Cyril Guillaume & Stephane Ragot - stephane.ragot@francetelecom.com>.
=============================================================================
*/

#include <math.h>
#include "lowcfe.h"

static void scalespeech (LowcFE_c *, short *out);
static void getfespeech (LowcFE_c *, short *out, int sz);
static void savespeech (LowcFE_c *, short *s);
static int findpitch (LowcFE_c *);
static void overlapadd (Float * l, Float * r, Float * o, int cnt);
static void overlapadds (short *l, short *r, short *o, int cnt);
static void overlapaddatend (LowcFE_c *, short *s, short *f, int cnt);
static void convertsf (short *f, Float * t, int cnt);
static void convertfs (Float * f, short *t, int cnt);
static void copyf (Float * f, Float * t, int cnt);
static void copys (short *f, short *t, int cnt);
static void zeros (short *s, int cnt);

void construct (LowcFE_c * lc) {
lc->erasecnt = 0;
lc->pitchbufend = &lc->pitchbuf[HISTORYLEN];
zeros (lc->history, HISTORYLEN);
}

int get_param (LowcFE_c * lc) {
return lc->erasecnt;
}

/*
* Get samples from the circular pitch buffer. Update poffset so
* when subsequent frames are erased the signal continues.
*/
static void getfespeech (LowcFE_c * lc, short *out, int sz) {
while (sz) {
int cnt = lc->pitchblen - lc->poffset;
if (cnt > sz)
cnt = sz;
convertfs (&lc->pitchbufstart[lc->poffset], out, cnt);
lc->poffset += cnt;
if (lc->poffset == lc->pitchblen)
lc->poffset = 0;
out += cnt;
sz -= cnt;
}
}

static void scalespeech (LowcFE_c * lc, short *out) {
int i;
Float g = (Float) 1. - (lc->erasecnt - 1) * ATTENFAC;
for (i = 0; i < FRAMESZ; i++) {
out[i] = (short) (out[i] * g);
g -= ATTENINCR;
}
}

/*
* Generate the synthetic signal.
* At the beginning of an erasure determine the pitch, and extract
* one pitch period from the tail of the signal. Do an OLA for 1/4
* of the pitch to smooth the signal. Then repeat the extracted signal
* for the length of the erasure. If the erasure continues for more than
* 10 msec, increase the number of periods in the pitchbuffer. At the end
* of an erasure, do an OLA with the start of the first good frame.
* The gain decays as the erasure gets longer.
*/
void dofe (LowcFE_c * lc, short *out) {
if (lc->erasecnt == 0) {
/* get history */
convertsf (lc->history, lc->pitchbuf, HISTORYLEN);
lc->pitch = findpitch (lc); /* find pitch */
lc->poverlap = lc->pitch >> 2; /* OLA 1/4 wavelength */
/* save original last poverlap samples */
copyf (lc->pitchbufend - lc->poverlap, lc->lastq, lc->poverlap);
lc->poffset = 0; /* create pitch buffer with 1 period */
lc->pitchblen = lc->pitch;
lc->pitchbufstart = lc->pitchbufend - lc->pitchblen;
overlapadd (lc->lastq, lc->pitchbufstart - lc->poverlap, lc->pitchbufend - lc->poverlap, lc->poverlap);
/* update last 1/4 wavelength in history buffer */
convertfs (lc->pitchbufend - lc->poverlap, &lc->history[HISTORYLEN - lc->poverlap], lc->poverlap);
/* get synthesized speech */
getfespeech (lc, out, FRAMESZ);
} else if (lc->erasecnt == 1 || lc->erasecnt == 2) {
/* tail of previous pitch estimate */
short tmp[POVERLAPMAX];
int saveoffset = lc->poffset; /* save offset for OLA */
/* continue with old pitchbuf */
getfespeech (lc, tmp, lc->poverlap);
/* add periods to the pitch buffer */
lc->poffset = saveoffset;
while (lc->poffset > lc->pitch)
lc->poffset -= lc->pitch;
lc->pitchblen += lc->pitch; /* add a period */
lc->pitchbufstart = lc->pitchbufend - lc->pitchblen;
overlapadd (lc->lastq, lc->pitchbufstart - lc->poverlap, lc->pitchbufend - lc->poverlap, lc->poverlap);
/* overlap add old pitchbuffer with new */
getfespeech (lc, out, FRAMESZ);
overlapadds (tmp, out, out, lc->poverlap);
scalespeech (lc, out);
} else if (lc->erasecnt > 5) {
zeros (out, FRAMESZ);
} else {
getfespeech (lc, out, FRAMESZ);
scalespeech (lc, out);
}
lc->erasecnt++;
savespeech (lc, out);
}

/*
* Save a frames worth of new speech in the history buffer.
* Return the output speech delayed by POVERLAPMAX.
*/
static void savespeech (LowcFE_c * lc, short *s) {
/* make room for new signal */
copys (&lc->history[FRAMESZ], lc->history, HISTORYLEN - FRAMESZ);
/* copy in the new frame */
copys (s, &lc->history[HISTORYLEN - FRAMESZ], FRAMESZ);
/* copy out the delayed frame */
copys (&lc->history[HISTORYLEN - FRAMESZ - POVERLAPMAX], s, FRAMESZ);
}

/*
* A good frame was received and decoded.
* If right after an erasure, do an overlap add with the synthetic signal.
* Add the frame to history buffer.
*/
void addtohistory (LowcFE_c * lc, short *s) {
if (lc->erasecnt) {
short overlapbuf[FRAMESZ];
/*
* longer erasures require longer overlaps
* to smooth the transition between the synthetic
* and real signal.
*/
int olen = lc->poverlap + (lc->erasecnt - 1) * EOVERLAPINCR;
if (olen > FRAMESZ)
olen = FRAMESZ;
getfespeech (lc, overlapbuf, olen);
overlapaddatend (lc, s, overlapbuf, olen);
lc->erasecnt = 0;
}
savespeech (lc, s);
}

/*
* Overlapp add the end of the erasure with the start of the first good frame
* Scale the synthetic speech by the gain factor before the OLA.
*/
static void overlapaddatend (LowcFE_c * lc, short *s, short *f, int cnt) {
int i;
Float incrg;
Float lw, rw;
Float t;
Float incr = (Float) 1. / cnt;
Float gain = (Float) 1. - (lc->erasecnt - 1) * ATTENFAC;
if (gain < 0.)
gain = (Float) 0.;
incrg = incr * gain;
lw = ((Float) 1. - incr) * gain;
rw = incr;
for (i = 0; i < cnt; i++) {
t = lw * f[i] + rw * s[i];
if (t > 32767.)
t = (Float) 32767.;
else if (t < -32768.)
t = (Float) - 32768.;
s[i] = (short) t;
lw -= incrg;
rw += incr;
}
}

/*
* Overlapp add left and right sides
*/
static void overlapadd (Float * l, Float * r, Float * o, int cnt) {
int i;
Float incr, lw, rw, t;

if (cnt == 0)
return;
incr = (Float) 1. / cnt;
lw = (Float) 1. - incr;
rw = incr;
for (i = 0; i < cnt; i++) {
t = lw * l[i] + rw * r[i];
if (t > (Float) 32767.)
t = (Float) 32767.;
else if (t < (Float) - 32768.)
t = (Float) - 32768.;
o[i] = t;
lw -= incr;
rw += incr;
}
}

/*
* Overlapp add left and right sides
*/
static void overlapadds (short *l, short *r, short *o, int cnt) {
int i;
Float incr, lw, rw, t;

if (cnt == 0)
return;
incr = (Float) 1. / cnt;
lw = (Float) 1. - incr;
rw = incr;
for (i = 0; i < cnt; i++) {
t = lw * l[i] + rw * r[i];
if (t > (Float) 32767.)
t = (Float) 32767.;
else if (t < (Float) - 32768.)
t = (Float) - 32768.;
o[i] = (short) t;
lw -= incr;
rw += incr;
}
}

/*
* Estimate the pitch.
* l - pointer to first sample in last 20 msec of speech.
* r - points to the sample PITCH_MAX before l
*/
static int findpitch (LowcFE_c * lc) {
int i, j, k;
int bestmatch;
Float bestcorr;
Float corr; /* correlation */
Float energy; /* running energy */
Float scale; /* scale correlation by average power */
Float *rp; /* segment to match */
Float *l = lc->pitchbufend - CORRLEN;
Float *r = lc->pitchbufend - CORRBUFLEN;

/* coarse search */
rp = r;
energy = (Float) 0.;
corr = (Float) 0.;
for (i = 0; i < CORRLEN; i += NDEC) {
energy += rp[i] * rp[i];
corr += rp[i] * l[i];
}
scale = energy;
if (scale < CORRMINPOWER)
scale = CORRMINPOWER;
corr = corr / (Float) sqrt (scale);
bestcorr = corr;
bestmatch = 0;
for (j = NDEC; j <= PITCHDIFF; j += NDEC) {
energy -= rp[0] * rp[0];
energy += rp[CORRLEN] * rp[CORRLEN];
rp += NDEC;
corr = 0.f;
for (i = 0; i < CORRLEN; i += NDEC)
corr += rp[i] * l[i];
scale = energy;
if (scale < CORRMINPOWER)
scale = CORRMINPOWER;
corr /= (Float) sqrt (scale);
if (corr >= bestcorr) {
bestcorr = corr;
bestmatch = j;
}
}
/* fine search */
j = bestmatch - (NDEC - 1);
if (j < 0)
j = 0;
k = bestmatch + (NDEC - 1);
if (k > PITCHDIFF)
k = PITCHDIFF;
rp = &r[j];
energy = 0.f;
corr = 0.f;
for (i = 0; i < CORRLEN; i++) {
energy += rp[i] * rp[i];
corr += rp[i] * l[i];
}
scale = energy;
if (scale < CORRMINPOWER)
scale = CORRMINPOWER;
corr = corr / (Float) sqrt (scale);
bestcorr = corr;
bestmatch = j;
for (j++; j <= k; j++) {
energy -= rp[0] * rp[0];
energy += rp[CORRLEN] * rp[CORRLEN];
rp++;
corr = 0.f;
for (i = 0; i < CORRLEN; i++)
corr += rp[i] * l[i];
scale = energy;
if (scale < CORRMINPOWER)
scale = CORRMINPOWER;
corr = corr / (Float) sqrt (scale);
if (corr > bestcorr) {
bestcorr = corr;
bestmatch = j;
}
}
return PITCH_MAX - bestmatch;
}

static void convertsf (short *f, Float * t, int cnt) {
int i;
for (i = 0; i < cnt; i++)
t[i] = (Float) f[i];
}

static void convertfs (Float * f, short *t, int cnt) {
int i;
for (i = 0; i < cnt; i++)
t[i] = (short) f[i];
}

static void copyf (Float * f, Float * t, int cnt) {
int i;
for (i = 0; i < cnt; i++)
t[i] = f[i];
}

static void copys (short *f, short *t, int cnt) {
int i;
for (i = 0; i < cnt; i++)
t[i] = f[i];
}

static void zeros (short *s, int cnt) {
int i;
for (i = 0; i < cnt; i++)
s[i] = 0;
}

主要接口就两个

  • addtohistory: 传入正常的PCM
  • dofe:模拟输出丢包的PCM

用例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "lowcfe.h"

void main(int argc, char *argv[])
{
int i;
int dofe = 1; /* if not set use silence insertion */
int nframes; /* processed frame count */
int nerased; /* erased frame count */
char *arg;
FILE *fi; /* input file */
FILE *fo_lost; /* output file */
FILE *fo; /* output file */
LowcFE_c lc; /* PLC simulation data */
short in[FRAMESZ]; /* i/o buffer */
short in_tmp[FRAMESZ]; /* i/o buffer */
int nCount = 1;
int ntmp = 2;
int nframestmp = 2;
argc--;
argv++;
if (argc != 3) {
fprintf(stderr, "argc:%d", argc);
exit(EXIT_FAILURE);
}
if ((fi = fopen(argv[0], "rb")) == NULL) { /* input file */
fprintf(stderr, "Can't open input file: %s", argv[0]);
exit(EXIT_FAILURE);
}
if ((fo_lost = fopen(argv[1], "wb")) == NULL) { /* output lost file */
fprintf(stderr, "Can't open output file: %s", argv[1]);
exit(EXIT_FAILURE);
}
if ((fo = fopen(argv[2], "wb")) == NULL) { /* output file */
fprintf(stderr, "Can't open output file: %s", argv[2]);
exit(EXIT_FAILURE);
}
nframes = nerased = 0;
construct(&lc);

while (fread(in, sizeof(short), FRAMESZ, fi) == FRAMESZ) {
nframes++;
if ((nframes % 2 == 0))
{
if (nframes == nframestmp)
{
nCount++;
ntmp = (ntmp == 2) ? 4 : 2;
nframestmp += ntmp;
}
}
if (nCount % 2 == 0)// && (nframes >= ((nCount-1) * 2)) && (nframes < (nCount * 2)))
{
nerased++; /* frame is erased */
memset(in, 0, FRAMESZ * sizeof(short));
fwrite(in, sizeof(short), FRAMESZ, fo_lost);
dofe(&lc, in);
}
else
{
if (nframes != 1)
fwrite(in, sizeof(short), FRAMESZ, fo_lost);
else
memcpy(in_tmp, in, sizeof(short)*FRAMESZ);
addtohistory(&lc, in); /* frame is not erased */
}
/*
* The concealment algorithm delays the signal by
* POVERLAPMAX samples. Remove the delay so the output
* file is time-aligned with the input file.
*/
if (nframes == 1)
{
fwrite(&in[POVERLAPMAX], sizeof(short), FRAMESZ - POVERLAPMAX, fo);
fwrite(in_tmp, sizeof(short), FRAMESZ, fo);
fwrite(&in[POVERLAPMAX], sizeof(short), FRAMESZ - POVERLAPMAX, fo_lost);
}
else
fwrite(in, sizeof(short), FRAMESZ, fo);
}
/*
* the following code outputs the delayed speech in the history buffer
* so the length of the output file is an integral multiple of
* the frame size.
*/
if (nframes) {
for (i = 0; i < FRAMESZ; i++)
in[i] = 0;
addtohistory(&lc, in);
fwrite(in, sizeof(short), POVERLAPMAX, fo);
}
if (nframes)
printf("%d of %d frames concealed = %.2f%%\n", nerased, nframes, (double)nerased / nframes * 100.);
/* cleanup */
fclose(fo);
fclose(fo_lost);
fclose(fi);
return;
}