I am struggling getting ok sound quality from esp32 using bluetooth when 2 or more voices are played. It sounds so distorted. I made an android app to send and play midi messages on the esp. The app works fine. Im not using dac just onboard bluetooth and pin 25 with capacitor and 2w 64 ohm speaker.
#define AUDIO_RATE 44100
#define MOZZI_AUDIO_CHANNEL_1_PIN 25 // DAC1 (GPIO 25)
#include <MozziGuts.h>
#include <Oscil.h>
#include <ADSR.h>
#include <mozzi_midi.h>
#include <tables/sin2048_int8.h>
#include <BluetoothSerial.h>
#define CONTROL_RATE 128 // Smoother control
#define MAX_VOICES 16 // Starting point, can increase
#define ATTACK 50
#define DECAY 50
#define SUSTAIN 60000
#define RELEASE 200
#define ATTACK_LEVEL 255
#define DECAY_LEVEL 255
BluetoothSerial SerialBT;
Oscil<2048, AUDIO_RATE> oscillators[MAX_VOICES];
ADSR<CONTROL_RATE, CONTROL_RATE> envelopes[MAX_VOICES];
struct Voice {
byte note;
byte velocity;
bool active;
};
Voice voices[MAX_VOICES];
int attack = ATTACK, decay = DECAY, sustain = SUSTAIN, release = RELEASE;
void setup() {
Serial.begin(115200);
delay(100);
Serial.println("Starting...");
SerialBT.begin("ESP32Synth");
Serial.println("Bluetooth started. Pair with 'ESP32Synth'");
for (int i = 0; i < MAX_VOICES; i++) {
oscillators[i].setTable(SIN2048_DATA);
envelopes[i].setADLevels(ATTACK_LEVEL, DECAY_LEVEL);
envelopes[i].setTimes(attack, decay, sustain, release);
voices[i].note = 0;
voices[i].velocity = 0;
voices[i].active = false;
}
startMozzi(CONTROL_RATE);
Serial.println("Mozzi started");
}
void updateControl() {
static String command = "";
while (SerialBT.available()) {
char receivedChar = SerialBT.read();
if (receivedChar == '\n') {
parseBluetoothMIDI(command);
command = "";
} else {
command += receivedChar;
}
}
for (int i = 0; i < MAX_VOICES; i++) {
if (voices[i].active) envelopes[i].update();
}
}
void parseBluetoothMIDI(String command) {
int separatorIndex = command.indexOf(':');
if (separatorIndex == -1) return;
String action = command.substring(0, separatorIndex);
int value = command.substring(separatorIndex + 1).toInt();
if (action == "DOWN") {
int commaIndex = command.indexOf(',', separatorIndex + 1);
byte note = value;
byte velocity = 127;
if (commaIndex != -1) {
note = command.substring(separatorIndex + 1, commaIndex).toInt();
velocity = command.substring(commaIndex + 1).toInt();
}
playNote(note, velocity);
} else if (action == "UP") {
stopNote(value);
}
}
int findVoiceToAllocate(byte note) {
for (int i = 0; i < MAX_VOICES; i++) {
if (voices[i].active && voices[i].note == note) return i;
}
for (int i = 0; i < MAX_VOICES; i++) {
if (!voices[i].active) return i;
}
unsigned long oldestTime = millis();
int oldestVoice = 0;
for (int i = 0; i < MAX_VOICES; i++) {
if (voices[i].active && voices[i].note != 0) {
unsigned long timeDiff = millis() - voices[i].note;
if (timeDiff < oldestTime) {
oldestTime = timeDiff;
oldestVoice = i;
}
}
}
return oldestVoice;
}
void playNote(byte note, byte velocity) {
if (velocity == 0) {
stopNote(note);
return;
}
int voiceIndex = findVoiceToAllocate(note);
// Random phase to avoid phase cancellation
oscillators[voiceIndex].setPhase(random(0, 2048));
oscillators[voiceIndex].setFreq(mtof(note));
voices[voiceIndex].note = note;
voices[voiceIndex].velocity = velocity;
voices[voiceIndex].active = true;
// Scale velocity properly to avoid overflow
int scaledVelocity = map(velocity, 1, 127, 10, 255);
envelopes[voiceIndex].setADLevels(scaledVelocity, scaledVelocity);
envelopes[voiceIndex].setTimes(attack, decay, sustain, release);
envelopes[voiceIndex].noteOn();
}
void stopNote(byte note) {
for (int i = 0; i < MAX_VOICES; i++) {
if (voices[i].active && voices[i].note == note) {
envelopes[i].noteOff();
voices[i].active = false;
voices[i].note = 0;
}
}
}
int updateAudio() {
int32_t output = 0;
int activeVoices = 0;
// Calculate the maximum envelope value for proper scaling
int32_t maxEnvValue = 0;
for (int i = 0; i < MAX_VOICES; i++) {
if (voices[i].active && envelopes[i].playing()) {
int envVal = envelopes[i].next();
if (envVal > maxEnvValue) maxEnvValue = envVal;
activeVoices++;
}
}
if (activeVoices == 0) return 128;
// Mix all active voices with proper scaling
for (int i = 0; i < MAX_VOICES; i++) {
if (voices[i].active && envelopes[i].playing()) {
// Get the current sample from the oscillator
int8_t oscSample = oscillators[i].next();
// Get the current envelope value
int envVal = envelopes[i].next();
// Scale the amplitude by the envelope and velocity
// Using fixed point math to maintain precision
int32_t scaledSample = ((int32_t)oscSample * envVal * voices[i].velocity) >> 15;
// Add to the output
output += scaledSample;
}
}
if (output > 64) {
output = 64 + (output - 64) / 2;
}
if (output < -64) {
output = -64 + (output + 64) / 2;
}
// Final scaling and centering for 8-bit DAC output
output = constrain(output, -128, 127);
return output + 128;
}
void loop() {
audioHook();
}