Speed up 9600 demodulator.

This commit is contained in:
wb2osz 2021-11-22 21:15:17 -05:00
parent 049614d16c
commit 9b9744ba15
4 changed files with 180 additions and 128 deletions

View File

@ -1,7 +1,7 @@
//
// This file is part of Dire Wolf, an amateur radio packet TNC.
//
// Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2019 John Langner, WB2OSZ
// Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2019, 2021 John Langner, WB2OSZ
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -60,10 +60,6 @@
static struct audio_s *save_audio_config_p;
// TODO: temp experiment.
static int zerostuff = 1; // temp experiment.
// Current state of all the decoders.
@ -676,12 +672,6 @@ int demod_init (struct audio_s *pa)
strlcpy (save_audio_config_p->achan[chan].profiles, "+", sizeof(save_audio_config_p->achan[chan].profiles));
}
#ifdef TUNE_ZEROSTUFF
zerostuff = TUNE_ZEROSTUFF;
#endif
/*
* We need a minimum number of audio samples per bit time for good performance.
* Easier to check here because demod_9600_init might have an adjusted sample rate.
@ -696,26 +686,32 @@ int demod_init (struct audio_s *pa)
if (save_audio_config_p->achan[chan].upsample == 0) {
if (ratio < 5) {
if (ratio < 4) {
// example: 44100 / 9600 is 4.59
// Big improvement with x2.
// x4 seems to work the best.
// The other parameters are not as touchy.
// Might reduce on ARM if it takes too much CPU power.
// This is extreme.
// No one should be using a sample rate this low but
// amazingly a recording with 22050 rate can be decoded.
// 3 and 4 are the same. Need more tests.
save_audio_config_p->achan[chan].upsample = 4;
}
else if (ratio < 5) {
// example: 44100 / 9600 is 4.59
// 3 is slightly better than 2 or 4.
save_audio_config_p->achan[chan].upsample = 3;
}
else if (ratio < 10) {
// 48000 / 9600 is 5.00
// Need more research. Treat like above for now.
// example: 48000 / 9600 = 5
// 3 is slightly better than 2 or 4.
save_audio_config_p->achan[chan].upsample = 4;
save_audio_config_p->achan[chan].upsample = 3;
}
else if (ratio < 15) {
// ...
// ... guessing
save_audio_config_p->achan[chan].upsample = 2;
}
@ -786,7 +782,8 @@ int demod_init (struct audio_s *pa)
}
demod_9600_init (save_audio_config_p->achan[chan].modem_type,
save_audio_config_p->achan[chan].upsample * save_audio_config_p->adev[ACHAN2ADEV(chan)].samples_per_sec,
save_audio_config_p->adev[ACHAN2ADEV(chan)].samples_per_sec,
save_audio_config_p->achan[chan].upsample,
save_audio_config_p->achan[chan].baud, D);
if (strchr(save_audio_config_p->achan[chan].profiles, '+') != NULL) {
@ -924,7 +921,7 @@ __attribute__((hot))
void demod_process_sample (int chan, int subchan, int sam)
{
float fsam;
int k;
//int k;
struct demodulator_state_s *D;
@ -1016,47 +1013,7 @@ void demod_process_sample (int chan, int subchan, int sam)
case MODEM_AIS:
default:
if (zerostuff) {
/* Literature says this is better if followed */
/* by appropriate low pass filter. */
/* So far, both are same in tests with different */
/* optimal low pass filter parameters. */
for (k=1; k<save_audio_config_p->achan[chan].upsample; k++) {
demod_9600_process_sample (chan, 0, D);
}
demod_9600_process_sample (chan, sam * save_audio_config_p->achan[chan].upsample, D);
}
else {
/* Linear interpolation. */
static int prev_sam;
switch (save_audio_config_p->achan[chan].upsample) {
case 1:
demod_9600_process_sample (chan, sam, D);
break;
case 2:
demod_9600_process_sample (chan, (prev_sam + sam) / 2, D);
demod_9600_process_sample (chan, sam, D);
break;
case 3:
demod_9600_process_sample (chan, (2 * prev_sam + sam) / 3, D);
demod_9600_process_sample (chan, (prev_sam + 2 * sam) / 3, D);
demod_9600_process_sample (chan, sam, D);
break;
case 4:
demod_9600_process_sample (chan, (3 * prev_sam + sam) / 4, D);
demod_9600_process_sample (chan, (prev_sam + sam) / 2, D);
demod_9600_process_sample (chan, (prev_sam + 3 * sam) / 4, D);
demod_9600_process_sample (chan, sam, D);
break;
default:
assert (0);
break;
}
prev_sam = sam;
}
demod_9600_process_sample (chan, sam, save_audio_config_p->achan[chan].upsample, D);
break;
} /* switch modem_type */

View File

@ -1,7 +1,7 @@
//
// This file is part of Dire Wolf, an amateur radio packet TNC.
//
// Copyright (C) 2011, 2012, 2013, 2015, 2019 John Langner, WB2OSZ
// Copyright (C) 2011, 2012, 2013, 2015, 2019, 2021 John Langner, WB2OSZ
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -25,7 +25,8 @@
*
* Module: demod_9600.c
*
* Purpose: Demodulator for scrambled baseband encoding.
* Purpose: Demodulator for baseband signal.
* This is used for AX.25 (with scrambling) and IL2P without.
*
* Input: Audio samples from either a file or the "sound card."
*
@ -45,12 +46,14 @@
#include <ctype.h>
// Fine tuning for different demodulator types.
// Don't remove this section. It is here for a reason.
#define DCD_THRESH_ON 32 // Hysteresis: Can miss 0 out of 32 for detecting lock.
// This is best for actual on-the-air signals.
// Still too many brief false matches.
#define DCD_THRESH_OFF 8 // Might want a little more fine tuning.
#define DCD_GOOD_WIDTH 1024 // No more than 1024!!!
#include "fsk_demod_state.h" // Values above override defaults.
#include "tune.h"
@ -125,9 +128,12 @@ static inline float agc (float in, float fast_attack, float slow_decay, float *p
*
* Inputs: modem_type - Determines whether scrambling is used.
*
* samples_per_sec - Number of samples per second.
* Might be upsampled in hopes of
* reducing the PLL jitter.
* samples_per_sec - Number of samples per second for audio.
*
* upsample - Factor to upsample the incoming stream.
* After a lot of experimentation, I discovered that
* it works better if the data is upsampled.
* This reduces the jitter for PLL syncronization.
*
* baud - Data rate in bits per second.
*
@ -137,10 +143,13 @@ static inline float agc (float in, float fast_attack, float slow_decay, float *p
*
*----------------------------------------------------------------*/
void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, struct demodulator_state_s *D)
void demod_9600_init (enum modem_t modem_type, int original_sample_rate, int upsample, int baud, struct demodulator_state_s *D)
{
float fc;
int j;
if (upsample < 1) upsample = 1;
if (upsample > 4) upsample = 4;
memset (D, 0, sizeof(struct demodulator_state_s));
D->modem_type = modem_type;
@ -155,12 +164,13 @@ void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, st
// case 'L': // upsample x4 with filtering.
D->lp_filter_len_bits = 1.0;
D->lp_filter_len_bits = 1.0; // -U4 = 61 4.59 samples/symbol
// Works best with odd number in some tests. Even is better in others.
//D->lp_filter_size = ((int) (0.5f * ( D->lp_filter_len_bits * (float)samples_per_sec / (float)baud ))) * 2 + 1;
//D->lp_filter_size = ((int) (0.5f * ( D->lp_filter_len_bits * (float)original_sample_rate / (float)baud ))) * 2 + 1;
D->lp_filter_size = (int) (( D->lp_filter_len_bits * (float)samples_per_sec / baud) + 0.5f);
// Just round to nearest integer.
D->lp_filter_size = (int) (( D->lp_filter_len_bits * (float)original_sample_rate / baud) + 0.5f);
D->lp_window = BP_WINDOW_COSINE;
@ -185,8 +195,11 @@ void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, st
dw_printf ("samples per bit = %.1f\n", (double)samples_per_sec / baud);
#endif
// PLL needs to use the upsampled rate.
D->pll_step_per_sample =
(int) round(TICKS_PER_PLL_CYCLE * (double) baud / (double)samples_per_sec);
(int) round(TICKS_PER_PLL_CYCLE * (double) baud / (double)(original_sample_rate * upsample));
#ifdef TUNE_LP_WINDOW
@ -217,13 +230,87 @@ void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, st
D->pll_searching_inertia = TUNE_PLL_SEARCHING;
#endif
fc = (float)baud * D->lpf_baud / (float)samples_per_sec;
// Initial filter (before scattering) is based on upsampled rate.
fc = (float)baud * D->lpf_baud / (float)(original_sample_rate * upsample);
//dw_printf ("demod_9600_init: call gen_lowpass(fc=%.2f, , size=%d, )\n", fc, D->lp_filter_size);
gen_lowpass (fc, D->lp_filter, D->lp_filter_size, D->lp_window);
gen_lowpass (fc, D->u.bb.lp_filter, D->lp_filter_size * upsample, D->lp_window);
// New in 1.7 -
// Use a polyphase filter to reduce the CPU load.
// Originally I used zero stuffing to upsample.
// Here is the general idea.
//
// Suppose the input samples are 1 2 3 4 5 6 7 8 9 ...
// Filter coefficents are a b c d e f g h i ...
//
// With original sampling rate, the filtering would involve multiplying and adding:
//
// 1a 2b 3c 4d 5e 6f ...
//
// When upsampling by 3, each of these would need to be evaluated
// for each audio sample:
//
// 1a 0b 0c 2d 0e 0f 3g 0h 0i ...
// 0a 1b 0c 0d 2e 0f 0g 3h 0i ...
// 0a 0b 1c 0d 0e 2f 0g 0h 3i ...
//
// 2/3 of the multiplies are always by a stuffed zero.
// We can do this more efficiently by removing them.
//
// 1a 2d 3g ...
// 1b 2e 3h ...
// 1c 2f 3i ...
//
// We scatter the original filter across multiple shorter filters.
// Each input sample cycles around them to produce the upsampled rate.
//
// a d g ...
// b e h ...
// c f i ...
//
// There are countless sources of information DSP but this one is unique
// in that it is a college course that mentions APRS.
// https://www2.eecs.berkeley.edu/Courses/EE123
//
// Was the effort worthwhile? Times on an RPi 3.
//
// command: atest -B9600 ~/walkabout9600[abc]-compressed*.wav
//
// These are 3 recordings of a portable system being carried out of
// range and back in again. It is a real world test for weak signals.
//
// options num decoded seconds x realtime
// 1.6 1.7 1.6 1.7 1.6 1.7
// --- --- --- --- --- ---
// -P- 171 172 23.928 17.967 14.9 19.9
// -P+ 180 180 54.688 48.772 6.5 7.3
// -P- -F1 177 178 32.686 26.517 10.9 13.5
//
// So, it turns out that -P+ doesn't have a dramatic improvement, only
// around 4%, for drastically increased CPU requirements.
// Maybe we should turn that off by default, especially for ARM.
//
int k = 0;
for (int i = 0; i < D->lp_filter_size; i++) {
D->u.bb.lp_polyphase_1[i] = D->u.bb.lp_filter[k++];
if (upsample >= 2) {
D->u.bb.lp_polyphase_2[i] = D->u.bb.lp_filter[k++];
if (upsample >= 3) {
D->u.bb.lp_polyphase_3[i] = D->u.bb.lp_filter[k++];
if (upsample >= 4) {
D->u.bb.lp_polyphase_4[i] = D->u.bb.lp_filter[k++];
}
}
}
}
/* Version 1.2: Experiment with different slicing levels. */
// Really didn't help that much because we should have a symmetrical signal.
for (j = 0; j < MAX_SUBCHANS; j++) {
slice_point[j] = 0.02f * (j - 0.5f * (MAX_SUBCHANS-1));
@ -259,7 +346,7 @@ void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, st
* been distorted by going thru voice transceivers not
* intended to pass this sort of "audio" signal.
*
* Data is "scrambled" to reduce the amount of DC bias.
* For G3RUH mode, data is "scrambled" to reduce the amount of DC bias.
* The data stream must be unscrambled at the receiving end.
*
* We also have a digital phase locked loop (PLL)
@ -276,6 +363,9 @@ void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, st
* of the function to be called for each bit recovered
* from the demodulator. For now, it's simply hard-coded.
*
* After experimentation, I found that this works better if
* the original signal is upsampled by 2x or even 4x.
*
* References: 9600 Baud Packet Radio Modem Design
* http://www.amsat.org/amsat/articles/g3ruh/109.html
*
@ -290,63 +380,57 @@ void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, st
inline static void nudge_pll (int chan, int subchan, int slice, float demod_out, struct demodulator_state_s *D);
__attribute__((hot))
void demod_9600_process_sample (int chan, int sam, struct demodulator_state_s *D)
{
static void process_filtered_sample (int chan, float fsam, struct demodulator_state_s *D);
__attribute__((hot))
void demod_9600_process_sample (int chan, int sam, int upsample, struct demodulator_state_s *D)
{
float fsam;
float amp;
float demod_out;
#if DEBUG4
static FILE *demod_log_fp = NULL;
static int log_file_seq = 0; /* Part of log file name */
#endif
int subchan = 0;
int demod_data; /* Still scrambled. */
assert (chan >= 0 && chan < MAX_CHANS);
assert (subchan >= 0 && subchan < MAX_SUBCHANS);
/*
* Filters use last 'filter_size' samples.
*
* First push the older samples down.
*
* Finally, put the most recent at the beginning.
*
* Future project? Rather than shifting the samples,
* it might be faster to add another variable to keep
* track of the most recent sample and change the
* indexing in the later loops that multiply and add.
*/
/* Scale to nice number for convenience. */
/* Consistent with the AFSK demodulator, we'd like to use */
/* only half of the dynamic range to have some headroom. */
/* i.e. input range +-16k becomes +-1 here and is */
/* displayed in the heard line as audio level 100. */
fsam = sam / 16384.0;
fsam = (float)sam / 16384.0f;
#if defined(TUNE_ZEROSTUFF) && TUNE_ZEROSTUFF == 0
// experiment - no filtering.
// Low pass filter
push_sample (fsam, D->u.bb.audio_in, D->lp_filter_size);
amp = fsam;
fsam = convolve (D->u.bb.audio_in, D->u.bb.lp_polyphase_1, D->lp_filter_size);
process_filtered_sample (chan, fsam, D);
if (upsample >= 2) {
fsam = convolve (D->u.bb.audio_in, D->u.bb.lp_polyphase_2, D->lp_filter_size);
process_filtered_sample (chan, fsam, D);
if (upsample >= 3) {
fsam = convolve (D->u.bb.audio_in, D->u.bb.lp_polyphase_3, D->lp_filter_size);
process_filtered_sample (chan, fsam, D);
if (upsample >= 4) {
fsam = convolve (D->u.bb.audio_in, D->u.bb.lp_polyphase_4, D->lp_filter_size);
process_filtered_sample (chan, fsam, D);
}
}
}
}
#else
push_sample (fsam, D->raw_cb, D->lp_filter_size);
/*
* Low pass filter to reduce noise yet pass the data.
*/
__attribute__((hot))
static void process_filtered_sample (int chan, float fsam, struct demodulator_state_s *D)
{
amp = convolve (D->raw_cb, D->lp_filter, D->lp_filter_size);
#endif
int subchan = 0;
/*
* Version 1.2: Capture the post-filtering amplitude for display.
@ -359,18 +443,18 @@ void demod_9600_process_sample (int chan, int sam, struct demodulator_state_s *D
// TODO: probably no need for this. Just use D->m_peak, D->m_valley
if (amp >= D->alevel_mark_peak) {
D->alevel_mark_peak = amp * D->quick_attack + D->alevel_mark_peak * (1.0f - D->quick_attack);
if (fsam >= D->alevel_mark_peak) {
D->alevel_mark_peak = fsam * D->quick_attack + D->alevel_mark_peak * (1.0f - D->quick_attack);
}
else {
D->alevel_mark_peak = amp * D->sluggish_decay + D->alevel_mark_peak * (1.0f - D->sluggish_decay);
D->alevel_mark_peak = fsam * D->sluggish_decay + D->alevel_mark_peak * (1.0f - D->sluggish_decay);
}
if (amp <= D->alevel_space_peak) {
D->alevel_space_peak = amp * D->quick_attack + D->alevel_space_peak * (1.0f - D->quick_attack);
if (fsam <= D->alevel_space_peak) {
D->alevel_space_peak = fsam * D->quick_attack + D->alevel_space_peak * (1.0f - D->quick_attack);
}
else {
D->alevel_space_peak = amp * D->sluggish_decay + D->alevel_space_peak * (1.0f - D->sluggish_decay);
D->alevel_space_peak = fsam * D->sluggish_decay + D->alevel_space_peak * (1.0f - D->sluggish_decay);
}
/*
@ -381,12 +465,14 @@ void demod_9600_process_sample (int chan, int sam, struct demodulator_state_s *D
* This works by looking at the minimum and maximum signal peaks
* and scaling the results to be roughly in the -1.0 to +1.0 range.
*/
float demod_out;
int demod_data; /* Still scrambled. */
demod_out = agc (amp, D->agc_fast_attack, D->agc_slow_decay, &(D->m_peak), &(D->m_valley));
demod_out = agc (fsam, D->agc_fast_attack, D->agc_slow_decay, &(D->m_peak), &(D->m_valley));
// TODO: There is potential for multiple decoders with one filter.
//dw_printf ("peak=%.2f valley=%.2f amp=%.2f norm=%.2f\n", D->m_peak, D->m_valley, amp, norm);
//dw_printf ("peak=%.2f valley=%.2f fsam=%.2f norm=%.2f\n", D->m_peak, D->m_valley, fsam, norm);
if (D->num_slicers <= 1) {
@ -435,7 +521,7 @@ void demod_9600_process_sample (int chan, int sam, struct demodulator_state_s *D
fprintf (demod_log_fp, "%.3f, %.3f, %.3f, %.3f, %.3f, %d, %.2f\n",
fsam + 6,
amp + 4,
fsam + 4,
D->m_peak + 4,
D->m_valley + 4,
demod_out + 2,

View File

@ -6,9 +6,9 @@
#include "fsk_demod_state.h"
void demod_9600_init (enum modem_t modem_type, int samples_per_sec, int baud, struct demodulator_state_s *D);
void demod_9600_init (enum modem_t modem_type, int original_sample_rate, int upsample, int baud, struct demodulator_state_s *D);
void demod_9600_process_sample (int chan, int sam, struct demodulator_state_s *D);
void demod_9600_process_sample (int chan, int sam, int upsample, struct demodulator_state_s *D);

View File

@ -301,6 +301,8 @@ struct demodulator_state_s
// //
//////////////////////////////////////////////////////////////////////////////////
// TODO: Continue experiments with root raised cosine filter.
// Either switch to that or take out all the related stuff.
struct bb_only_s {
@ -314,8 +316,15 @@ struct demodulator_state_s
float audio_in[MAX_FILTER_SIZE] __attribute__((aligned(16))); // Audio samples in.
// FIXME: use lp_filter
float rrc_filter[MAX_FILTER_SIZE] __attribute__((aligned(16))); // RRC Low pass filter.
float lp_filter[MAX_FILTER_SIZE] __attribute__((aligned(16))); // Low pass filter.
// New in 1.7 - Polyphase filter to reduce CPU requirements.
float lp_polyphase_1[MAX_FILTER_SIZE] __attribute__((aligned(16)));
float lp_polyphase_2[MAX_FILTER_SIZE] __attribute__((aligned(16)));
float lp_polyphase_3[MAX_FILTER_SIZE] __attribute__((aligned(16)));
float lp_polyphase_4[MAX_FILTER_SIZE] __attribute__((aligned(16)));
float lp_1_iir_param; // very low pass filters to get DC offset.
float lp_1_out;