fun_ofdm  1.0
802.11a Physical Layer for USRP
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Macros Pages
fun::viterbi Class Reference

The viterbi class. More...

#include <viterbi.h>

Public Member Functions

void conv_decode (unsigned char *symbols, unsigned char *data, int data_bits)
 Decodes convolutionally encoded data using the viterbi algorithm. More...
 
void conv_encode (unsigned char *data, unsigned char *symbols, int data_bits)
 Convolutionally encodeds data. More...
 

Private Member Functions

COMPUTETYPE Branchtab[NUMSTATES/2
*RATE
__attribute__ ((aligned(16)))
 
void viterbi_chainback (struct v *vp, unsigned char *data, unsigned int nbits, unsigned int endstate)
 
void FULL_SPIRAL (int nbits, unsigned char *Y, unsigned char *X, const unsigned char *syms, unsigned char *dec, unsigned char *Branchtab)
 viterbi::FULL_SPIRAL More...
 
struct vviterbi_alloc (int len)
 Create a new instance of a Viterbi decoder. More...
 
void viterbi_free (struct v *vp)
 Destroy instance of a Viterbi decoder. More...
 
void viterbi_init (struct v *vp, int starting_state)
 Initialize decoder for start of new frame. More...
 
void viterbi_decode (struct v *vp, const COMPUTETYPE *symbols, unsigned char *data, int nbits)
 Decode one frame worth of data. More...
 
void viterbi_update_blk_SPIRAL (struct v *vp, const COMPUTETYPE *syms, int nbits)
 set the viterbi decoder to use a specific implementation More...
 

Detailed Description

The viterbi class.

Definition at line 64 of file viterbi.h.

Member Function Documentation

COMPUTETYPE Branchtab [NUMSTATES/2*RATE] fun::viterbi::__attribute__ ( (aligned(16))  )
private
void fun::viterbi::conv_decode ( unsigned char *  symbols,
unsigned char *  data,
int  data_bits 
)

Decodes convolutionally encoded data using the viterbi algorithm.

Parameters
symbolsCoded symbols that need to be decoded.
dataOutput data that has been decoded.
data_bitsNumber of data bits that that should be left after decoding

Main decode function.

Definition at line 31 of file viterbi.cpp.

References viterbi_alloc(), viterbi_decode(), viterbi_free(), and viterbi_init().

Referenced by fun::ppdu::decode_data(), and fun::ppdu::decode_header().

32  {
33  struct v * vp = viterbi_alloc(data_bits);
34  viterbi_init(vp, 0);
35  viterbi_decode(vp, &symbols[0], &data[0], data_bits);
36  viterbi_free(vp);
37  }
void fun::viterbi::conv_encode ( unsigned char *  data,
unsigned char *  symbols,
int  data_bits 
)

Convolutionally encodeds data.

Parameters
dataThe data to be coded.
symbolsThe coded output symbols.
data_bitsThe number of bits in the data input.

Definition at line 39 of file viterbi.cpp.

References K, fun::parity(), POLYS, and RATE.

Referenced by fun::ppdu::encode_data(), and fun::ppdu::encoder_header().

40  {
41  int symbol_count = RATE * (data_bits + 6);
42  memset(symbols, 0, symbol_count);
43 
44  int polys[RATE] = POLYS;
45  int sr = 0;
46 
47  int index = 0;
48  for(int i = 0; i < data_bits+(K-1); i++) {
49 
50  int b = data[i/8];
51  int j = i % 8;
52  int bit = (b >> (7 - j)) & 1;
53 
54  sr = (sr << 1) | bit;
55  for(int k = 0; k < RATE; k++)
56  {
57  int m = sr & polys[k];
58  int par = parity(sr & polys[k]);
59  symbols[index++] = par;
60  }
61  }
62  }
void fun::viterbi::FULL_SPIRAL ( int  nbits,
unsigned char *  Y,
unsigned char *  X,
const unsigned char *  syms,
unsigned char *  dec,
unsigned char *  Branchtab 
)
private

viterbi::FULL_SPIRAL

Parameters
nbits
Y
X
syms
dec
Branchtab

Definition at line 208 of file viterbi.cpp.

Referenced by viterbi_update_blk_SPIRAL().

208  {
209  for(int i9 = 0; i9 <= (nbits/2-1); i9++) {
210  unsigned char a75, a81;
211  int a73, a92;
212  short int s20, s21, s26, s27;
213  const unsigned char *a74, *a80, *b6;
214  short int *a110, *a111, *a91, *a93, *a94;
215  __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83
216  , *a95, *a96, *a97, *a98, *a99;
217  __m128i a105, a106, a86, a87;
218  __m128i a100, a101, a103, a104, a107, a108, a109
219  , a76, a78, a79, a82, a84, a85, a88, a89
220  , a90, d10, d11, d12, d9, m23, m24, m25
221  , m26, m27, m28, m29, m30, s18, s19, s22
222  , s23, s24, s25, s28, s29, t13, t14, t15
223  , t16, t17, t18;
224  a71 = ((__m128i *) X);
225  s18 = *(a71);
226  a72 = (a71 + 2);
227  s19 = *(a72);
228  a73 = (4 * i9);
229  a74 = (syms + a73);
230  a75 = *(a74);
231  a76 = _mm_set1_epi8(a75);
232  a77 = ((__m128i *) Branchtab);
233  a78 = *(a77);
234  a79 = _mm_xor_si128(a76, a78);
235  b6 = (a73 + syms);
236  a80 = (b6 + 1);
237  a81 = *(a80);
238  a82 = _mm_set1_epi8(a81);
239  a83 = (a77 + 2);
240  a84 = *(a83);
241  a85 = _mm_xor_si128(a82, a84);
242  t13 = _mm_avg_epu8(a79,a85);
243  a86 = ((__m128i ) t13);
244  a87 = _mm_srli_epi16(a86, 2);
245  a88 = ((__m128i ) a87);
246  t14 = _mm_and_si128(a88, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
247  , 63, 63, 63, 63, 63, 63, 63, 63
248  , 63));
249  t15 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
250  , 63, 63, 63, 63, 63, 63, 63, 63
251  , 63), t14);
252  m23 = _mm_adds_epu8(s18, t14);
253  m24 = _mm_adds_epu8(s19, t15);
254  m25 = _mm_adds_epu8(s18, t15);
255  m26 = _mm_adds_epu8(s19, t14);
256  a89 = _mm_min_epu8(m24, m23);
257  d9 = _mm_cmpeq_epi8(a89, m24);
258  a90 = _mm_min_epu8(m26, m25);
259  d10 = _mm_cmpeq_epi8(a90, m26);
260  s20 = _mm_movemask_epi8(_mm_unpacklo_epi8(d9,d10));
261  a91 = ((short int *) dec);
262  a92 = (8 * i9);
263  a93 = (a91 + a92);
264  *(a93) = s20;
265  s21 = _mm_movemask_epi8(_mm_unpackhi_epi8(d9,d10));
266  a94 = (a93 + 1);
267  *(a94) = s21;
268  s22 = _mm_unpacklo_epi8(a89, a90);
269  s23 = _mm_unpackhi_epi8(a89, a90);
270  a95 = ((__m128i *) Y);
271  *(a95) = s22;
272  a96 = (a95 + 1);
273  *(a96) = s23;
274  a97 = (a71 + 1);
275  s24 = *(a97);
276  a98 = (a71 + 3);
277  s25 = *(a98);
278  a99 = (a77 + 1);
279  a100 = *(a99);
280  a101 = _mm_xor_si128(a76, a100);
281  a102 = (a77 + 3);
282  a103 = *(a102);
283  a104 = _mm_xor_si128(a82, a103);
284  t16 = _mm_avg_epu8(a101,a104);
285  a105 = ((__m128i ) t16);
286  a106 = _mm_srli_epi16(a105, 2);
287  a107 = ((__m128i ) a106);
288  t17 = _mm_and_si128(a107, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
289  , 63, 63, 63, 63, 63, 63, 63, 63
290  , 63));
291  t18 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
292  , 63, 63, 63, 63, 63, 63, 63, 63
293  , 63), t17);
294  m27 = _mm_adds_epu8(s24, t17);
295  m28 = _mm_adds_epu8(s25, t18);
296  m29 = _mm_adds_epu8(s24, t18);
297  m30 = _mm_adds_epu8(s25, t17);
298  a108 = _mm_min_epu8(m28, m27);
299  d11 = _mm_cmpeq_epi8(a108, m28);
300  a109 = _mm_min_epu8(m30, m29);
301  d12 = _mm_cmpeq_epi8(a109, m30);
302  s26 = _mm_movemask_epi8(_mm_unpacklo_epi8(d11,d12));
303  a110 = (a93 + 2);
304  *(a110) = s26;
305  s27 = _mm_movemask_epi8(_mm_unpackhi_epi8(d11,d12));
306  a111 = (a93 + 3);
307  *(a111) = s27;
308  s28 = _mm_unpacklo_epi8(a108, a109);
309  s29 = _mm_unpackhi_epi8(a108, a109);
310  a112 = (a95 + 2);
311  *(a112) = s28;
312  a113 = (a95 + 3);
313  *(a113) = s29;
314  if ((((unsigned char *) Y)[0]>210)) {
315  __m128i m5, m6;
316  m5 = ((__m128i *) Y)[0];
317  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[1]);
318  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[2]);
319  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[3]);
320  __m128i m7;
321  m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
322  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 32)), ((__m128i ) m7)));
323  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 16)), ((__m128i ) m7)));
324  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 8)), ((__m128i ) m7)));
325  m7 = _mm_unpacklo_epi8(m7, m7);
326  m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
327  m6 = _mm_unpacklo_epi64(m7, m7);
328  ((__m128i *) Y)[0] = _mm_subs_epu8(((__m128i *) Y)[0], m6);
329  ((__m128i *) Y)[1] = _mm_subs_epu8(((__m128i *) Y)[1], m6);
330  ((__m128i *) Y)[2] = _mm_subs_epu8(((__m128i *) Y)[2], m6);
331  ((__m128i *) Y)[3] = _mm_subs_epu8(((__m128i *) Y)[3], m6);
332  }
333  unsigned char a188, a194;
334  int a186, a205;
335  short int s48, s49, s54, s55;
336  const unsigned char *a187, *a193, *b15;
337  short int *a204, *a206, *a207, *a223, *a224, *b16;
338  __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210
339  , *a211, *a212, *a215, *a225, *a226;
340  __m128i a199, a200, a218, a219;
341  __m128i a189, a191, a192, a195, a197, a198, a201
342  , a202, a203, a213, a214, a216, a217, a220, a221
343  , a222, d17, d18, d19, d20, m39, m40, m41
344  , m42, m43, m44, m45, m46, s46, s47, s50
345  , s51, s52, s53, s56, s57, t25, t26, t27
346  , t28, t29, t30;
347  a184 = ((__m128i *) Y);
348  s46 = *(a184);
349  a185 = (a184 + 2);
350  s47 = *(a185);
351  a186 = (4 * i9);
352  b15 = (a186 + syms);
353  a187 = (b15 + 2);
354  a188 = *(a187);
355  a189 = _mm_set1_epi8(a188);
356  a190 = ((__m128i *) Branchtab);
357  a191 = *(a190);
358  a192 = _mm_xor_si128(a189, a191);
359  a193 = (b15 + 3);
360  a194 = *(a193);
361  a195 = _mm_set1_epi8(a194);
362  a196 = (a190 + 2);
363  a197 = *(a196);
364  a198 = _mm_xor_si128(a195, a197);
365  t25 = _mm_avg_epu8(a192,a198);
366  a199 = ((__m128i ) t25);
367  a200 = _mm_srli_epi16(a199, 2);
368  a201 = ((__m128i ) a200);
369  t26 = _mm_and_si128(a201, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
370  , 63, 63, 63, 63, 63, 63, 63, 63
371  , 63));
372  t27 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
373  , 63, 63, 63, 63, 63, 63, 63, 63
374  , 63), t26);
375  m39 = _mm_adds_epu8(s46, t26);
376  m40 = _mm_adds_epu8(s47, t27);
377  m41 = _mm_adds_epu8(s46, t27);
378  m42 = _mm_adds_epu8(s47, t26);
379  a202 = _mm_min_epu8(m40, m39);
380  d17 = _mm_cmpeq_epi8(a202, m40);
381  a203 = _mm_min_epu8(m42, m41);
382  d18 = _mm_cmpeq_epi8(a203, m42);
383  s48 = _mm_movemask_epi8(_mm_unpacklo_epi8(d17,d18));
384  a204 = ((short int *) dec);
385  a205 = (8 * i9);
386  b16 = (a204 + a205);
387  a206 = (b16 + 4);
388  *(a206) = s48;
389  s49 = _mm_movemask_epi8(_mm_unpackhi_epi8(d17,d18));
390  a207 = (b16 + 5);
391  *(a207) = s49;
392  s50 = _mm_unpacklo_epi8(a202, a203);
393  s51 = _mm_unpackhi_epi8(a202, a203);
394  a208 = ((__m128i *) X);
395  *(a208) = s50;
396  a209 = (a208 + 1);
397  *(a209) = s51;
398  a210 = (a184 + 1);
399  s52 = *(a210);
400  a211 = (a184 + 3);
401  s53 = *(a211);
402  a212 = (a190 + 1);
403  a213 = *(a212);
404  a214 = _mm_xor_si128(a189, a213);
405  a215 = (a190 + 3);
406  a216 = *(a215);
407  a217 = _mm_xor_si128(a195, a216);
408  t28 = _mm_avg_epu8(a214,a217);
409  a218 = ((__m128i ) t28);
410  a219 = _mm_srli_epi16(a218, 2);
411  a220 = ((__m128i ) a219);
412  t29 = _mm_and_si128(a220, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
413  , 63, 63, 63, 63, 63, 63, 63, 63
414  , 63));
415  t30 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
416  , 63, 63, 63, 63, 63, 63, 63, 63
417  , 63), t29);
418  m43 = _mm_adds_epu8(s52, t29);
419  m44 = _mm_adds_epu8(s53, t30);
420  m45 = _mm_adds_epu8(s52, t30);
421  m46 = _mm_adds_epu8(s53, t29);
422  a221 = _mm_min_epu8(m44, m43);
423  d19 = _mm_cmpeq_epi8(a221, m44);
424  a222 = _mm_min_epu8(m46, m45);
425  d20 = _mm_cmpeq_epi8(a222, m46);
426  s54 = _mm_movemask_epi8(_mm_unpacklo_epi8(d19,d20));
427  a223 = (b16 + 6);
428  *(a223) = s54;
429  s55 = _mm_movemask_epi8(_mm_unpackhi_epi8(d19,d20));
430  a224 = (b16 + 7);
431  *(a224) = s55;
432  s56 = _mm_unpacklo_epi8(a221, a222);
433  s57 = _mm_unpackhi_epi8(a221, a222);
434  a225 = (a208 + 2);
435  *(a225) = s56;
436  a226 = (a208 + 3);
437  *(a226) = s57;
438  if ((((unsigned char *) X)[0]>210)) {
439  __m128i m12, m13;
440  m12 = ((__m128i *) X)[0];
441  m12 = _mm_min_epu8(m12, ((__m128i *) X)[1]);
442  m12 = _mm_min_epu8(m12, ((__m128i *) X)[2]);
443  m12 = _mm_min_epu8(m12, ((__m128i *) X)[3]);
444  __m128i m14;
445  m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
446  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 32)), ((__m128i ) m14)));
447  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 16)), ((__m128i ) m14)));
448  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 8)), ((__m128i ) m14)));
449  m14 = _mm_unpacklo_epi8(m14, m14);
450  m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
451  m13 = _mm_unpacklo_epi64(m14, m14);
452  ((__m128i *) X)[0] = _mm_subs_epu8(((__m128i *) X)[0], m13);
453  ((__m128i *) X)[1] = _mm_subs_epu8(((__m128i *) X)[1], m13);
454  ((__m128i *) X)[2] = _mm_subs_epu8(((__m128i *) X)[2], m13);
455  ((__m128i *) X)[3] = _mm_subs_epu8(((__m128i *) X)[3], m13);
456  }
457  }
458  /* skip */
459  }
struct v * fun::viterbi::viterbi_alloc ( int  len)
private

Create a new instance of a Viterbi decoder.

Parameters
len= FRAMEBITS (unpadded! data bits)

Definition at line 81 of file viterbi.cpp.

References fun::v::decisions, K, NUMSTATES, fun::parity(), POLYS, RATE, and viterbi_init().

Referenced by conv_decode().

81  {
82  struct v *vp;
83  static int Init = 0;
84 
85  int state, i;
86  int polys[RATE] = POLYS;
87  for (state=0;state < NUMSTATES/2;state++) {
88  for (i=0; i<RATE; i++) {
89  Branchtab[i*NUMSTATES/2+state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0;
90  }
91  }
92  Init++;
93 
94  if (posix_memalign((void**)&vp, 16,sizeof(struct v)))
95  return NULL;
96 
97  // NOTE: a frame-worth of decisions!
98  if (posix_memalign((void**)&vp->decisions, 16,(len+(K-1))*sizeof(decision_t))) {
99  free(vp);
100  return NULL;
101  }
102  viterbi_init(vp, 0);
103 
104  return vp;
105  }
void fun::viterbi::viterbi_chainback ( struct v vp,
unsigned char *  data,
unsigned int  nbits,
unsigned int  endstate 
)
private

Definition at line 108 of file viterbi.cpp.

References ADDSHIFT, fun::v::decisions, K, NUMSTATES, SUBSHIFT, and fun::decision_t::w.

Referenced by viterbi_decode().

112  {
113 
114  decision_t *d = vp->decisions;
115 
116  /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
117  #if (K-1<8)
118  #define ADDSHIFT (8-(K-1))
119  #define SUBSHIFT 0
120  #elif (K-1>8)
121  #define ADDSHIFT 0
122  #define SUBSHIFT ((K-1)-8)
123  #else
124  #define ADDSHIFT 0
125  #define SUBSHIFT 0
126  #endif
127 
128  /* Make room beyond the end of the encoder register so we can
129  * accumulate a full byte of decoded data
130  */
131  endstate = (endstate % NUMSTATES) << ADDSHIFT;
132 
133  /* The store into data[] only needs to be done every 8 bits.
134  * But this avoids a conditional branch, and the writes will
135  * combine in the cache anyway
136  */
137  d += (K-1); /* Look past tail */
138  while (nbits-- != 0) {
139  int k = (d[nbits].w[(endstate >> ADDSHIFT)/32] >> ((endstate >> ADDSHIFT)%32)) & 1;
140  endstate = (endstate >> 1) | (k << (K-2+ADDSHIFT));
141  data[nbits >> 3] = endstate >> SUBSHIFT;
142  }
143 
144  #undef ADDSHIRT
145  #undef SUBSHIFT
146  }
void fun::viterbi::viterbi_decode ( struct v vp,
const COMPUTETYPE symbols,
unsigned char *  data,
int  nbits 
)
private

Decode one frame worth of data.

viterbi::viterbi_decode

Parameters
vpPointer to v struct used to store parameters and help with decoding
symbolsInput symbol to be decoded
dataOutput data that has been decoded

NOTE: nbits has to match what was passed to viterbi_alloc(...) FIXME: store nbits in struct v?

Parameters
vp
symbols
data
nbits

Definition at line 166 of file viterbi.cpp.

References K, viterbi_chainback(), viterbi_init(), and viterbi_update_blk_SPIRAL().

Referenced by conv_decode().

166  {
167  // vp = viterbi decoder
168  // data = decoded
169  // symbols = signal
170 
171  /* Decode it and make sure we get the right answer */
172  /* Initialize Viterbi decoder */
173  viterbi_init(vp, 0);
174 
175  /* Decode block */
176  //vp->update_blk(vp, symbols, nbits+(K-1));
177  viterbi_update_blk_SPIRAL(vp, symbols, nbits + (K-1));
178 
179  /* Do Viterbi chainback */
180  viterbi_chainback(vp, data, nbits, 0);
181  }
void fun::viterbi::viterbi_free ( struct v vp)
private

Destroy instance of a Viterbi decoder.

Delete instance of a Viterbi decoder.

Parameters
vppointer to the v struct to free
vp

Definition at line 152 of file viterbi.cpp.

References fun::v::decisions.

Referenced by conv_decode().

152  {
153  if (vp != NULL) {
154  free(vp->decisions);
155  free(vp);
156  }
157  }
void fun::viterbi::viterbi_init ( struct v vp,
int  starting_state 
)
private

Initialize decoder for start of new frame.

viterbi::viterbi_init

Parameters
vppointer to v struct to initialize for decoding.
starting_stateInitial state for viterbi decoder
vp
starting_state

Definition at line 71 of file viterbi.cpp.

References fun::v::new_metrics, NUMSTATES, fun::v::old_metrics, and fun::metric_t::t.

Referenced by conv_decode(), viterbi_alloc(), and viterbi_decode().

71  {
72  for(int i=0; i < NUMSTATES; i++)
73  vp->metrics1.t[i] = 63;
74 
75  vp->old_metrics = &vp->metrics1;
76  vp->new_metrics = &vp->metrics2;
77  vp->old_metrics->t[starting_state & (NUMSTATES-1)] = 0; /* Bias known start state */
78  }
void fun::viterbi::viterbi_update_blk_SPIRAL ( struct v vp,
const COMPUTETYPE syms,
int  nbits 
)
private

set the viterbi decoder to use a specific implementation

viterbi::viterbi_update_blk_SPIRAL

Parameters
vp
syms
nbits

Definition at line 190 of file viterbi.cpp.

References fun::v::decisions, FULL_SPIRAL(), fun::v::new_metrics, fun::v::old_metrics, fun::decision_t::t, and fun::metric_t::t.

Referenced by viterbi_decode().

190  {
191  decision_t *d = (decision_t *)vp->decisions;
192 
193  for (int s = 0; s < nbits; s++)
194  memset(d+s, 0, sizeof(decision_t));
195 
196  FULL_SPIRAL(nbits, vp->new_metrics->t, vp->old_metrics->t, syms, d->t, Branchtab);
197  }

The documentation for this class was generated from the following files: