Mozzi polyphony.

MulV8

unread,

Mar 5, 2025, 4:50:25 PMMar 5

to Mozzi-users

I am struggling getting ok sound quality from esp32 using bluetooth when 2 or more voices are played. It sounds so distorted. I made an android app to send and play midi messages on the esp. The app works fine. Im not using dac just onboard bluetooth and pin 25 with capacitor and 2w 64 ohm speaker.

thanks,

Luke

#define AUDIO_RATE 44100
#define MOZZI_AUDIO_CHANNEL_1_PIN 25 // DAC1 (GPIO 25)
#include <MozziGuts.h>
#include <Oscil.h>
#include <ADSR.h>
#include <mozzi_midi.h>
#include <tables/sin2048_int8.h>
#include <BluetoothSerial.h>

#define CONTROL_RATE 128 // Smoother control
#define MAX_VOICES 16 // Starting point, can increase

#define ATTACK 50
#define DECAY 50
#define SUSTAIN 60000
#define RELEASE 200
#define ATTACK_LEVEL 255
#define DECAY_LEVEL 255

BluetoothSerial SerialBT;

Oscil<2048, AUDIO_RATE> oscillators[MAX_VOICES];
ADSR<CONTROL_RATE, CONTROL_RATE> envelopes[MAX_VOICES];

struct Voice {
  byte note;
  byte velocity;
  bool active;
};

Voice voices[MAX_VOICES];

int attack = ATTACK, decay = DECAY, sustain = SUSTAIN, release = RELEASE;

void setup() {
  Serial.begin(115200);
  delay(100);
  Serial.println("Starting...");

  SerialBT.begin("ESP32Synth");
  Serial.println("Bluetooth started. Pair with 'ESP32Synth'");

  for (int i = 0; i < MAX_VOICES; i++) {
    oscillators[i].setTable(SIN2048_DATA);
    envelopes[i].setADLevels(ATTACK_LEVEL, DECAY_LEVEL);
    envelopes[i].setTimes(attack, decay, sustain, release);
    voices[i].note = 0;
    voices[i].velocity = 0;
    voices[i].active = false;
  }

  startMozzi(CONTROL_RATE);
  Serial.println("Mozzi started");
}

void updateControl() {
  static String command = "";
  while (SerialBT.available()) {
    char receivedChar = SerialBT.read();
    if (receivedChar == '\n') {
      parseBluetoothMIDI(command);
      command = "";
    } else {
      command += receivedChar;
    }
  }

  for (int i = 0; i < MAX_VOICES; i++) {
    if (voices[i].active) envelopes[i].update();
  }
}

void parseBluetoothMIDI(String command) {
  int separatorIndex = command.indexOf(':');
  if (separatorIndex == -1) return;

  String action = command.substring(0, separatorIndex);
  int value = command.substring(separatorIndex + 1).toInt();

  if (action == "DOWN") {
    int commaIndex = command.indexOf(',', separatorIndex + 1);
    byte note = value;
    byte velocity = 127;
    if (commaIndex != -1) {
      note = command.substring(separatorIndex + 1, commaIndex).toInt();
      velocity = command.substring(commaIndex + 1).toInt();
    }
    playNote(note, velocity);
  } else if (action == "UP") {
    stopNote(value);
  }
}

int findVoiceToAllocate(byte note) {
  for (int i = 0; i < MAX_VOICES; i++) {
    if (voices[i].active && voices[i].note == note) return i;
  }
  for (int i = 0; i < MAX_VOICES; i++) {
    if (!voices[i].active) return i;
  }
  unsigned long oldestTime = millis();
  int oldestVoice = 0;
  for (int i = 0; i < MAX_VOICES; i++) {
    if (voices[i].active && voices[i].note != 0) {
      unsigned long timeDiff = millis() - voices[i].note;
      if (timeDiff < oldestTime) {
        oldestTime = timeDiff;
        oldestVoice = i;
      }
    }
  }
  return oldestVoice;
}

void playNote(byte note, byte velocity) {
  if (velocity == 0) {
    stopNote(note);
    return;
  }

  int voiceIndex = findVoiceToAllocate(note);
  
  // Random phase to avoid phase cancellation
  oscillators[voiceIndex].setPhase(random(0, 2048)); 
  oscillators[voiceIndex].setFreq(mtof(note));
  
  voices[voiceIndex].note = note;
  voices[voiceIndex].velocity = velocity;
  voices[voiceIndex].active = true;
  
  // Scale velocity properly to avoid overflow
  int scaledVelocity = map(velocity, 1, 127, 10, 255);
  envelopes[voiceIndex].setADLevels(scaledVelocity, scaledVelocity);
  envelopes[voiceIndex].setTimes(attack, decay, sustain, release);
  envelopes[voiceIndex].noteOn();
}

void stopNote(byte note) {
  for (int i = 0; i < MAX_VOICES; i++) {
    if (voices[i].active && voices[i].note == note) {
      envelopes[i].noteOff();
      voices[i].active = false;
      voices[i].note = 0;
    }
  }
}

int updateAudio() {
  int32_t output = 0;
  int activeVoices = 0;
  
  // Calculate the maximum envelope value for proper scaling
  int32_t maxEnvValue = 0;
  for (int i = 0; i < MAX_VOICES; i++) {
    if (voices[i].active && envelopes[i].playing()) {
      int envVal = envelopes[i].next();
      if (envVal > maxEnvValue) maxEnvValue = envVal;
      activeVoices++;
    }
  }
  

  if (activeVoices == 0) return 128;
  
  // Mix all active voices with proper scaling
  for (int i = 0; i < MAX_VOICES; i++) {
    if (voices[i].active && envelopes[i].playing()) {
      // Get the current sample from the oscillator
      int8_t oscSample = oscillators[i].next();
      
      // Get the current envelope value
      int envVal = envelopes[i].next();
      
      // Scale the amplitude by the envelope and velocity
      // Using fixed point math to maintain precision
      int32_t scaledSample = ((int32_t)oscSample * envVal * voices[i].velocity) >> 15;
      
      // Add to the output
      output += scaledSample;
    }
  }
  
  if (output > 64) {
    output = 64 + (output - 64) / 2;
  }
  if (output < -64) {
    output = -64 + (output + 64) / 2;
  }
  
  // Final scaling and centering for 8-bit DAC output
  output = constrain(output, -128, 127);
  return output + 128;
}

void loop() {
  audioHook();
}

Mozzi-users

unread,

Mar 5, 2025, 7:53:49 PMMar 5

to Mozzi-users

Hi,

one quick observation/question -

do you really want to add 128 to the output at the end of updateAudio()?

Mozzi expects the return value to be 0-centred.

Also, check out the examples as documentation as Mozzi now has functions which take care of scaling outputs to match the hardware being used.

For example, the Basic>Sinewave example has this at the end of updateAudio():

return MonoOutput::from8Bit(aSin.next()); // return an int signal centred around 0

And this page might be useful:

https://sensorium.github.io/Mozzi/doc/html/struct_mono_output.html#a8a0bf7d9c4446b83f17ccd75433a828f

MulV8

unread,

Mar 6, 2025, 1:49:31 PMMar 6

to mozzi...@googlegroups.com

It is not necessary. Ill try it without it. I saw that in an esp32 example I think and assumed it was needed.

--
You received this message because you are subscribed to the Google Groups "Mozzi-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to mozzi-users...@googlegroups.com.
To view this discussion, visit https://groups.google.com/d/msgid/mozzi-users/77616638-5bdb-4dcb-9ba3-f29bc8039e21n%40googlegroups.com.

MulV8

unread,

Mar 7, 2025, 12:38:07 PMMar 7

to Mozzi-users

Still sounds distorted but better. Even twinkle twinkle midi file is so distorted you cant really recognise it.

  oscillators[voiceIndex].setFreq(mtof(note));
  
  voices[voiceIndex].note = note;
  voices[voiceIndex].velocity = velocity;
  voices[voiceIndex].active = true;
  
  // Scale velocity properly to avoid overflow
  int scaledVelocity = map(velocity, 1, 127, 10, 255);
  envelopes[voiceIndex].setADLevels(scaledVelocity, scaledVelocity);
  envelopes[voiceIndex].setTimes(attack, decay, sustain, release);
  envelopes[voiceIndex].noteOn();
}

void stopNote(byte note) {
  for (int i = 0; i < MAX_VOICES; i++) {
    if (voices[i].active && voices[i].note == note) {
      envelopes[i].noteOff();
      voices[i].active = false;
      voices[i].note = 0;
    }
  }
}

int updateAudio() {

    int32_t output = 0; // Use a 32-bit integer to avoid overflow during mixing
    int activeVoices = 0; // Count the number of active voices

    // Mix all active voices

    for (int i = 0; i < MAX_VOICES; i++) {
        if (voices[i].active && envelopes[i].playing()) {

            // Get the current oscillator sample and envelope value
            int8_t oscSample = oscillators[i].next(); // Oscillator output (-128 to 127)
            int envVal = envelopes[i].next();         // Envelope output (0 to 255)

            // Scale the oscillator sample by the envelope and velocity
            int32_t contribution = (oscSample * envVal * voices[i].velocity) >> 8;

            // Add the contribution to the final output
            output += contribution;

            // Increment the active voice count
            activeVoices++;
        }
    }

    // If no voices are active, return silence
    if (activeVoices == 0) {
        return 0;
    }

    // Scale the output to the appropriate range for the DAC
    // Use MonoOutput::fromAlmostNBit() to handle scaling and clipping
    return MonoOutput::fromAlmostNBit(10, output).clip();
}

void loop() {
  audioHook();
}

Mozzi-users

unread,

Mar 7, 2025, 7:47:13 PMMar 7

to Mozzi-users

Another quick observation or 2 regarding MAX_VOICES set to 16 in your sketch - I haven't tested how many oscillators with envelopes can be played with an esp32, so I'm not sure if the cpu can keep up. That's one thing to explore, maybe start by getting just 1 voice to work properly and working up from there.

Also, assuming each voice is 8 bits before being summed 16 voices * 8bits is 12 bits, your output might still be distorting since you're allowing only almost 10 bits depth :
return MonoOutput::fromAlmostNBit(10, output).clip();

This could work better:

return MonoOutput::fromNBit(12, output);

(Note - I haven't looked really closely through every line of code, so I might be wrong about the bit depth of each voice in your sketch).

Another problem could be that when you're only playing one 8 bit voice, it will get divided by 16 (MAX_VOICES), scaled down by 4 bits, to be only a 4 bit waveform output, which will sound pretty distorted. (Is that why you're using 10 as the bit depth in your output line?).

And, does the bluetooth midi code block the processor waiting for messages? Something worth checking, in case you need to use non-blocking code to use bluetooth.

A good start is to get a minimal version working, and add parts step-by-step from there. I'd try to get one voice, then a small number of voices working first, without bluetooth, using set values so you don't need midi, and build in the midi when you're sure the audio is solid....

tomco...@live.fr

unread,

Mar 8, 2025, 4:44:38 AMMar 8

to Mozzi-users

Hei,

To add on the previous comment, and similarly without having done any tests:

- int32_t contribution = (oscSample * envVal * voices[i].velocity) >> 8; ends up being 16 bits if I am correct (8+8+8-8) which is quite more than the 10 you are telling Mozzi that you are sending: return MonoOutput::fromAlmostNBit(10, output).clip(); a good test is to increase this, say to 16 and see if the sound gets better, indicating that you are sending more bits than Mozzi expects.

- MonoOutput::fromAlmostNBit is not very used for ESP32, this was mostly coded for AVR which can output nearly 9 bits (but not exactly), I would suggest changing to fromNBit

- #define AUDIO_RATE 44100 is not a tested value. Might also pose some problems, I would suggest commenting that line (and using Mozzi's default of 32768)

- maybe it would be worth to change millis() to Mozzi's timing functions (https://sensorium.github.io/Mozzi/doc/html/group__core.html#gaaa6a42d80c5297407a45ca8bf3c1c7fe) not sure how important it is on the ESP

Hope this helps!

MulV8

unread,

Mar 8, 2025, 5:51:39 AMMar 8

to Mozzi-users

16 does sound better so it probably a scaling issue. I will have to experiment further

MulV8

unread,

Mar 8, 2025, 8:17:08 AMMar 8

to Mozzi-users

I wonder would using an external dac produce better results. https://www.instructables.com/How-to-Make-and-Test-a-Better-DAC-With-ESP32/

MulV8

unread,

Mar 10, 2025, 3:55:43 PMMar 10

to Mozzi-users

Was playing around with it. this code sounds a lot better. Will leave it here for future reference for anyone else.

#include <BluetoothSerial.h>

#include <MozziGuts.h>
#include <Oscil.h>
#include <ADSR.h>

#include <tables/saw2048_int8.h>
#include <tables/sin2048_int8.h>
#include <mozzi_midi.h>
#include <LowPassFilter.h>

#if !defined(CONFIG_BT_ENABLED) || !defined(CONFIG_BLUEDROID_ENABLED)
  #error Bluetooth is not enabled! Please run `make menuconfig` to enable it
#endif

BluetoothSerial SerialBT;

// Synth configuration - reduced voice count for ESP32
const int NUM_VOICES = 4; // Start with fewer voices to ensure stability

// Oscillators and envelopes
Oscil<SAW2048_NUM_CELLS, AUDIO_RATE> oscillators[NUM_VOICES];
Oscil<SIN2048_NUM_CELLS, AUDIO_RATE> subOscillators[NUM_VOICES]; // Sub oscillator for richer tone
ADSR<CONTROL_RATE, AUDIO_RATE> envelopes[NUM_VOICES];
LowPassFilter filters[NUM_VOICES]; // Optional: Add a filter per voice

// Voice structure to track active notes
struct Voice {
  byte note;
  bool active;
  unsigned long startTime;
  bool releasing;
  byte velocity;
};

Voice voices[NUM_VOICES];

// Synth parameters
int octaveShift = 0;
int transpose = 0;
int attackTime = 50;    // in ms
int decayTime = 100;    // in ms
int sustainLevel = 180; // 0-255
int releaseTime = 300;  // in ms
int filterCutoff = 255; // 0-255
int detune = 0;         // 0-255, affects sub oscillator

// Function prototypes
void playNote(byte note);
void stopNote(byte note);
void panicAllNotes();
void processCommand(String command);
void updateSynthParameters();

void setup() {
  Serial.begin(115200);
  SerialBT.begin("ESP32Synth"); // Bluetooth device name
  Serial.println("ESP32 MIDI Synth started. Pair with device name: ESP32_MIDI_Synth");

  // Initialize all voices
  for (int i = 0; i < NUM_VOICES; i++) {
    // Setup oscillators
    oscillators[i].setTable(SAW2048_DATA);
    subOscillators[i].setTable(SIN2048_DATA);

    // Setup envelope
    envelopes[i].setADLevels(255, sustainLevel);
    envelopes[i].setTimes(attackTime, decayTime, 65000, releaseTime);

    // Setup filter (if used)
    filters[i].setResonance(200);
    filters[i].setCutoffFreq(filterCutoff);

    // Initialize voice state

    voices[i].note = 0;

    voices[i].active = false;

    voices[i].startTime = 0;
    voices[i].releasing = false;
    voices[i].velocity = 127; // Default to max velocity
  }

  startMozzi(CONTROL_RATE);
  Serial.println("Mozzi started. Ready for MIDI commands.");
}

void playNote(byte note) {
  note += transpose;
  float baseFreq = mtof(note + 12 * octaveShift);

  // Find inactive voice or steal oldest
  int voiceIndex = -1;
  unsigned long oldestStartTime = mozziMicros();

  for (int i = 0; i < NUM_VOICES; i++) {
    if (!voices[i].active) {
      voiceIndex = i;
      break;
    }
    if (voices[i].startTime < oldestStartTime) {
      oldestStartTime = voices[i].startTime;
      voiceIndex = i;
    }
  }

  if (voiceIndex >= 0) {

    voices[voiceIndex].note = note;

    voices[voiceIndex].active = true;
    voices[voiceIndex].releasing = false;
    voices[voiceIndex].startTime = mozziMicros();
    voices[voiceIndex].velocity = 127;

    // Set oscillator frequencies
    oscillators[voiceIndex].setFreq(baseFreq);

    // Detune the sub oscillator
    float detuneAmount = map(detune, 0, 255, 0, 100) * 0.01f;
    float subFreq = baseFreq / 2.0f * (1.0f - detuneAmount);
    subOscillators[voiceIndex].setFreq(subFreq);

    // Reset and trigger envelope
    envelopes[voiceIndex].setADLevels(255, sustainLevel);
    envelopes[voiceIndex].setTimes(attackTime, decayTime, 65000, releaseTime);
    envelopes[voiceIndex].noteOn();

    Serial.print("Note ON: ");
    Serial.print(note);
    Serial.print(" -> Voice: ");
    Serial.println(voiceIndex);
  }
}

void stopNote(byte note) {
  note += transpose;

  for (int i = 0; i < NUM_VOICES; i++) {
    if (voices[i].active && voices[i].note == note && !voices[i].releasing) {
      voices[i].releasing = true;
      envelopes[i].noteOff();

      Serial.print("Note OFF: ");
      Serial.print(note);
      Serial.print(" -> Voice: ");
      Serial.println(i);
      break;
    }
  }
}

void panicAllNotes() {
  Serial.println("PANIC: All notes off");
  for (int i = 0; i < NUM_VOICES; i++) {

    envelopes[i].noteOff();
    voices[i].active = false;

    voices[i].releasing = false;

  }
}

void updateControl() {
  static String command = "";

  // Process Bluetooth messages

  while (SerialBT.available()) {
    char receivedChar = SerialBT.read();

    if (receivedChar == '\n') {

      processCommand(command);

      command = "";
    } else {
      command += receivedChar;
    }
  }

  // Voice management: check if any releasing voices have completed
  for (int i = 0; i < NUM_VOICES; i++) {
    if (voices[i].active && voices[i].releasing) {
      if (!envelopes[i].playing()) {

        voices[i].active = false;

        voices[i].releasing = false;
      }
    }

    // Update envelopes independently
    if (voices[i].active || voices[i].releasing) {
      envelopes[i].update();
    }
  }
}

void processCommand(String command) {
  Serial.print("Command received: ");
  Serial.println(command);

  if (command.startsWith("DOWN:")) {
    byte note = command.substring(5).toInt();
    playNote(note);
  } else if (command.startsWith("UP:")) {
    byte note = command.substring(3).toInt();
    stopNote(note);
  } else if (command.startsWith("OCTAVE:")) {
    octaveShift = command.substring(7).toInt();
    Serial.print("Octave shift: ");
    Serial.println(octaveShift);
  } else if (command.startsWith("TRANSPOSE:")) {
    transpose = command.substring(10).toInt();
    Serial.print("Transpose: ");
    Serial.println(transpose);
  } else if (command.startsWith("ATTACK:")) {
    attackTime = map(command.substring(7).toInt(), 0, 255, 10, 1000);
    updateSynthParameters();
  } else if (command.startsWith("DECAY:")) {
    decayTime = map(command.substring(6).toInt(), 0, 255, 10, 1000);
    updateSynthParameters();
  } else if (command.startsWith("SUSTAIN:")) {
    sustainLevel = command.substring(8).toInt();
    updateSynthParameters();
  } else if (command.startsWith("RELEASE:")) {
    releaseTime = map(command.substring(8).toInt(), 0, 255, 10, 2000);
    updateSynthParameters();
  } else if (command.startsWith("FILTER:")) {
    filterCutoff = command.substring(7).toInt();
    updateSynthParameters();
  } else if (command.startsWith("DETUNE:")) {
    detune = command.substring(7).toInt();
    updateSynthParameters();
  } else if (command.startsWith("PANIC")) {
    panicAllNotes();
  }
}

void updateSynthParameters() {
  // Update all envelopes with new settings
  for (int i = 0; i < NUM_VOICES; i++) {
    envelopes[i].setADLevels(255, sustainLevel);
    envelopes[i].setTimes(attackTime, decayTime, 65000, releaseTime);
  }

  // Update filter
  for (int i = 0; i < NUM_VOICES; i++) {
    filters[i].setCutoffFreq(filterCutoff);
  }

  Serial.print("Synth params updated - A:");
  Serial.print(attackTime);
  Serial.print(" D:");
  Serial.print(decayTime);
  Serial.print(" S:");
  Serial.print(sustainLevel);
  Serial.print(" R:");
  Serial.print(releaseTime);
  Serial.print(" F:");
  Serial.print(filterCutoff);
  Serial.print(" Det:");
  Serial.println(detune);

}

int updateAudio() {
  int32_t output = 0;
  int activeVoices = 0;

  // Sum all active voices
  for (int i = 0; i < NUM_VOICES; i++) {
    if (voices[i].active || voices[i].releasing) {
      // Get envelope value

      int envVal = envelopes[i].next();

      // Get oscillator values
      int oscVal = oscillators[i].next();
      int subOscVal = subOscillators[i].next();

      // Mix oscillators (2/3 main, 1/3 sub)
      int voiceMix = ((oscVal * 2) + subOscVal) / 3;

      // Apply envelope and velocity
      int32_t contribution = ((int32_t)voiceMix * envVal * voices[i].velocity) >> 8;

      // Apply filter (if used)
      contribution = filters[i].next(contribution);

      // Add to output
      output += contribution;
      activeVoices++;
    }
  }

  // Prevent clipping by scaling based on number of active voices
  if (activeVoices > 1) {
    output = output / activeVoices;
  }

  // Use Mozzi's proper output functions
  return MonoOutput::fromNBit(14, output);
}

void loop() {
  audioHook(); // Required for Mozzi to work
}

Reply all

Reply to author

Forward