Akso - Audio DSP Platform

graindelay

$\text{[math]}$

granular echo line

Author: Johannes Taelman

License: BSD

Github: jt/granular/graindelay.axo

Inlets

frac32.bipolar delay

frac32.bipolar spread

frac32buffer.bipolar in

Outlets

frac32buffer l

frac32buffer r

Parameters

frac32.u.map delay

frac32.u.map spread

Attributes

combo grains

combo grainlength

combo window

combo size

Declaration

static const uint32_t LENGTHPOW = (attr_size);
static const uint32_t LENGTH = (1 << attr_size);
static const uint32_t LENGTHMASK = ((1 << attr_size) - 1);
int16_t *array;
uint32_t writepos;

static const int ngrains = attr_grains;
static const int attenuate_shift =
    attr_grains == 128 ? 4 : attr_grains == 64 ? 3 : attr_grains == 32 ? 2 : 1;
static const int grainlength = attr_grainlength;
// even distribution of grain phases
static const int incr1 = (1ull << 32) / ngrains;
static const int incr2 = ((1ull << 32) / (BUFSIZE * 1024)) *
                         ((BUFSIZE * BUFSIZE * 1024 /*ngrains*/) / grainlength);
static const int iNewGrainsPerFrame = (ngrains * BUFSIZE) / grainlength;
// to avoid division by zero warnings in inactive code...
static const int iNewGrainsPerFrame1 =
    iNewGrainsPerFrame > 0 ? iNewGrainsPerFrame : 1;

int32_t *offsets[ngrains];
int32_t prev_window[ngrains];
int32_t phase = 0;

int32_t save_FMC_SDCR1;

__attribute__((always_inline)) __STATIC_INLINE int32_t _SMLAWT(int32_t op1,
                                                               int32_t op2,
                                                               int32_t op3) {
  int32_t result;
  __ASM volatile("smlawt %0, %1, %2, %3"
                 : "=r"(result)
                 : "r"(op1), "r"(op2), "r"(op3));
  return (result);
}

__attribute__((always_inline)) __STATIC_INLINE int32_t _SMLAWB(int32_t op1,
                                                               int32_t op2,
                                                               int32_t op3) {
  int32_t result;
  __ASM volatile("smlawb %0, %1, %2, %3"
                 : "=r"(result)
                 : "r"(op1), "r"(op2), "r"(op3));
  return (result);
}

void setupDMACopy(int nwords) {
  DMA2D->IFCR |= DMA2D_IFSR_CTEIF;
  DMA2D->IFCR |= DMA2D_IFSR_CTCIF;
  DMA2D->CR = 0;
  DMA2D->FGPFCCR = 0;
  DMA2D->OPFCCR = 0;
  DMA2D->FGOR = 0;
  DMA2D->OOR = 0;
  DMA2D->NLR = 1 + (nwords << 16);
}

void initiateDMACopy(volatile int32_t *pDst, volatile int32_t *pSrc) {
  DMA2D->FGMAR = (uint32_t)pSrc;
  DMA2D->OMAR = (uint32_t)pDst;
  DMA2D->CR |= DMA2D_CR_START;
}

void waitDMACopyComplete() {
  while (DMA2D->CR & DMA2D_CR_START) {
  }
}

int window_triangle(int phase) {
  int32_t x = phase;
  x = x < 0 ? ~x : x;
  return x >> 3;
}

int window_smoothstep(int phase) {
  int32_t x = phase;
  x = x < 0 ? ~x : x;
  uint32_t _3_minus_2x = 0xC0000000 - x;
  uint32_t x_squared = ___SMMUL(x, x);
  return (_3_minus_2x * (uint64_t)x_squared) >> 32;
}

int window_smoothstep_ext(int phase) {
  int32_t x = phase;
  x = x < 0 ? ~x : x;
  x = __SSAT(x, 31) << 1;
  uint32_t _3_minus_2x = 0xC0000000 - x;
  uint32_t x_squared = ___SMMUL(x, x);
  return (_3_minus_2x * (uint64_t)x_squared) >> 32;
}

int window_hanning_ext(int phase) {
  int32_t x = phase;
  x = x < 0 ? ~x : x;
  x = __SSAT(x, 31) << 1;
  int32_t c = sin_q31(x - 0x40000000);
  return 0x08000000 + (c >> 4);
}

int window_hanning(int phase) {
  int32_t x = phase;
  int32_t c = sin_q31(x - 0x40000000);
  return 0x08000000 + (c >> 4);
}

int window(int phase) { return window_attr_window(phase); }

int32_t *getNewOffset(int delay, int spread) {
  int32_t tpdf = (GenerateRandomNumber() >> (32 - LENGTHPOW)) +
                 (GenerateRandomNumber() >> (32 - LENGTHPOW));
  int off = 2 * __SMMLA(spread, tpdf, delay);
  int lim = LENGTH - grainlength;
  if (off >= lim) {
    off = lim - (off - lim);
  }
  if (off < 0) {
    off = -off;
  }
  return (int32_t *)&array[(writepos - off) & LENGTHMASK];
}

void simd16_env(int32_t *outp, int32_t a_new, int32_t a_prev,
                volatile int32_t *src) {
  int32_t ainc = (a_new - a_prev) >> 4;
  int32_t a = a_prev;
  int i = BUFSIZE / 2;
  while (i--) {
    // registers:
    // 	*src
    // 	s_s16_s16
    // 	a
    // 	ainc
    // 	i
    //   *inp
    // 	x1
    // 	x2
    int32_t s_s16_s16 = *src++;
    int32_t *inp = outp;
    int32_t x1 = *inp++;
    int32_t x2 = *inp;
    x1 = _SMLAWB(a, s_s16_s16, x1);
    a += ainc;
    x2 = _SMLAWT(a, s_s16_s16, x2);
    a += ainc;
    *outp++ = x1;
    *outp++ = x2;
  }
}

Init

int i;
static int16_t _array[attr_poly][LENGTH * 2] __attribute__((section(".sdram")));
array = &_array[parent->polyIndex][0];
writepos = 0;
for (i = 0; i < LENGTH * 2; i++)
  array[i] = 0;

// initialize DMA2D engine
RCC->AHB1ENR |= RCC_AHB1ENR_DMA2DEN;
RCC->AHB1RSTR |= RCC_AHB1RSTR_DMA2DRST;
RCC->AHB1RSTR &= ~RCC_AHB1RSTR_DMA2DRST;

for (i = 0; i < ngrains; i++) {
  offsets[i] = (int32_t *)&array[0];
  prev_window[i] = 0;
}
phase = 0;

// enable SDRam read burst
// FMC_Bank5_6->SDCR[0] |= FMC_SDCR1_RBURST;
save_FMC_SDCR1 = *((uint32_t *)0xA0000140);
*((uint32_t *)0xA0000140) |= 0x00001000;

Control Rate

static int32_t dmabuf1[8] __attribute__((section(".sram2")));
static int32_t dmabuf2[8] __attribute__((section(".sram2")));

int i;
writepos = (writepos + BUFSIZE) & LENGTHMASK;
int16_t *parray = &array[writepos];
for (i = 0; i < BUFSIZE; i++) {
  *parray++ = __SSAT(inlet_in[i] >> 14, 16);
}
parray = &array[writepos] + LENGTH;
for (i = 0; i < BUFSIZE; i++) {
  *parray++ = __SSAT(inlet_in[i] >> 14, 16);
}

int32_t delay = __USAT(param_delay + inlet_delay, 27) >> (28 - LENGTHPOW);
int32_t spread = __USAT(param_spread + inlet_spread, 27) << 4;

/*
// unfinished
// search for next positive zerocrossing
        int16_t *psample = &attr_table.array[off + 128 * BUFSIZE];
        int j = 512;
        while(j--){
                if (*psample++ < 0) break;
        }
        j = 512;
        while(j--){
                if (*psample++ >= 0) break;
        }
        // align to middle of window
        *psample -= 64 * BUFSIZE;
        *psample -= index * 16;
        offsets[(127-index)&0x7F] = (int32_t *)((int32_t)psample & 0xFFFFFFFC);
*/

/*
example:
        ngrains=16
        grainlength=8*BUFSIZE
        iNewGrainsPerFrame=2

  frame:0 1 2 3 4 5 6 7 8 9 A B C D E F
grain:0 / - - - - - - \ / - - - - - - \
         1 \ / - - - - - - \ / - - - - - -
         2 - \ / - - - - - - \ / - - - - -
         3 - - \ / - - - - - - \ / - - - -
         4 - - - \ / - - - - - - \ / - - -
         5 - - - - \ / - - - - - - \ / - -
         6 - - - - - \ / - - - - - - \ / -
         7 - - - - - - \ / - - - - - - \ /
         8 / - - - - - - \ / - - - - - - \
         9 \ / - - - - - - \ / - - - - - -
         A - \ / - - - - - - \ / - - - - -
         B - - \ / - - - - - - \ / - - - -
         C - - - \ / - - - - - - \ / - - -
         D - - - - \ / - - - - - - \ / - -
         E - - - - - \ / - - - - - - \ / -
         F - - - - - - \ / - - - - - - \ /
even grains on left output
odd grains on right output
*/

if (iNewGrainsPerFrame) {
  int index =
      ((uint32_t)phase) / ((1ull << 32) / (ngrains / iNewGrainsPerFrame1));
  for (i = 0; i < iNewGrainsPerFrame; i++) {
    index += ngrains / iNewGrainsPerFrame1;
    offsets[(0 - index) & (ngrains - 1)] = getNewOffset(delay, spread);
  }
} else {
  if (!(phase & (incr1 - 1))) {
    int index = ((uint32_t)phase) / ((1ull << 32) / ngrains);
    offsets[(0 - index) & (ngrains - 1)] = getNewOffset(delay, spread);
  }
}

// every 32bit word contains 2 16bit samples
setupDMACopy(BUFSIZE / 2);

// clear output buffers
for (i = 0; i < BUFSIZE; i++) {
  outlet_l[i] = 0;
  outlet_r[i] = 0;
}

int iGrain;
int phl = phase;
int32_t **pGrain;
pGrain = &offsets[0];
int32_t *pPrevWindow = &prev_window[0];
int32_t a1, a2;

#if 1 // use DMA
// loop lead-in
waitDMACopyComplete();
initiateDMACopy(&dmabuf2[0], *pGrain);
*pGrain += BUFSIZE / 2;
pGrain++;
// loop
for (iGrain = 1; iGrain < (ngrains - 1); iGrain++) {
  // pingpong SDRAM to SRAM using DMA
  waitDMACopyComplete();
  initiateDMACopy(&dmabuf1[0], *pGrain);

  *pGrain += BUFSIZE / 2;
  pGrain++;
  a1 = *pPrevWindow;
  a2 = window(phl + incr2) >> attenuate_shift;
  *pPrevWindow++ = a2;

  //	if (iGrain==1+(param_p1>>20))
  simd16_env(&outlet_l[0], a2, a1, &dmabuf2[0]);

  phl += incr1;

  iGrain++;
  waitDMACopyComplete();
  initiateDMACopy(&dmabuf2[0], *pGrain);
  *pGrain += BUFSIZE / 2;
  pGrain++;

  a1 = *pPrevWindow;
  a2 = window(phl + incr2) >> attenuate_shift;
  *pPrevWindow++ = a2;

  //	if (iGrain==1+(param_p1>>20))
  simd16_env(&outlet_r[0], a2, a1, &dmabuf1[0]);

  phl += incr1;
}
// loop lead-out
waitDMACopyComplete();
initiateDMACopy(&dmabuf1[0], *pGrain);
*pGrain += BUFSIZE / 2;
pGrain++;
a1 = *pPrevWindow;
a2 = window(phl + incr2) >> attenuate_shift;
*pPrevWindow++ = a2;
simd16_env(&outlet_l[0], a2, a1, &dmabuf2[0]);
phl += incr1;
a1 = *pPrevWindow;
a2 = window(phl + incr2) >> attenuate_shift;
*pPrevWindow++ = a2;
waitDMACopyComplete();
simd16_env(&outlet_r[0], a2, a1, &dmabuf1[0]);

#else // implementation without DMA copy
for (iGrain = 0; iGrain < ngrains; iGrain++) {
  a1 = *pPrevWindow;
  a2 = window(phl + incr2) >> attenuate_shift;
  *pPrevWindow++ = a2;
  simd16_env(&outlet_l[0], a2, a1, *pGrain);
  *pGrain += BUFSIZE / 2;
  pGrain++;
  phl += incr1;
  iGrain++;
  a1 = *pPrevWindow;
  a2 = window(phl + incr2) >> attenuate_shift;
  *pPrevWindow++ = a2;
  simd16_env(&outlet_r[0], a2, a1, *pGrain);
  *pGrain += BUFSIZE / 2;
  pGrain++;
  phl += incr1;
}
#endif

phase += incr2;

Dispose

*((uint32_t *)0xA0000140) = save_FMC_SDCR1;