使用C++实现音频频谱显示 |21xrx.com

使用C++实现音频频谱显示

2023-07-06 04:19:24 深夜i 84 0

C++ 音频频谱显示实现

音频频谱显示是一种将音频信号转化为可视化图像的技术。这种技术可以帮助用户更加直观地理解音频信号的结构和特征，而且在音频编辑、混音、剪辑等领域也有着广泛的应用。

本文将介绍如何使用C++实现音频频谱显示。我们会使用一个名为FFTW的开源库来对音频信号进行傅里叶变换，然后将变换后的数据进行可视化显示。

首先，我们需要读取音频文件并将其表示为一个数字信号。这个过程可以使用一个叫做PortAudio的库完成。PortAudio是一个跨平台的音频输入/输出库，它支持多种操作系统，包括Windows、Mac OS和Linux等。

读取音频文件并生成数字信号的代码如下所示：

#include "portaudio.h"
#define SAMPLE_RATE (44100)
#define FRAMES_PER_BUFFER (512)
#define NUM_SECONDS (5)
#define NUM_CHANNELS (1)
typedef float SAMPLE;
typedef struct {
  int     frameIndex;
  int     maxFrameIndex;
  SAMPLE   *recordedSamples;
}
paData;
static int paCallback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData)
{
  paData *data = (paData*)userData;
  const SAMPLE *rptr = (const SAMPLE*)inputBuffer;
  SAMPLE *wptr = &data->recordedSamples[data->frameIndex * NUM_CHANNELS];
  long framesToCalc = framesPerBuffer;
  long i;
  int finished;
  unsigned long framesLeft = data->maxFrameIndex - data->frameIndex;
  if (framesLeft < framesToCalc)
    framesToCalc = framesLeft;
    finished = paComplete;
   else
    finished = paContinue;
  
  if (inputBuffer == NULL) {
    for (i = 0; i < framesToCalc; i++) {
      *wptr++ = SAMPLE_SILENCE;
      if (NUM_CHANNELS == 2) *wptr++ = SAMPLE_SILENCE;
    }
  } else {
    for (i = 0; i < framesToCalc; i++) {
      *wptr++ = *rptr++;
      if (NUM_CHANNELS == 2) *wptr++ = *rptr++;
    }
  }
  data->frameIndex += framesToCalc;
  return finished;
}
int main()
{
  PaStreamParameters inputParameters;
  PaStream *stream;
  PaError err;
  paData data;
  int numSamples = NUM_SECONDS * SAMPLE_RATE;
  int numBytes = numSamples * sizeof(SAMPLE);
  data.recordedSamples = (SAMPLE *) malloc(numBytes);
  data.maxFrameIndex = numSamples / NUM_CHANNELS;
  data.frameIndex = 0;
  err = Pa_Initialize();
  if (err != paNoError) return 1;
  inputParameters.device = Pa_GetDefaultInputDevice();
  if (inputParameters.device == paNoDevice) {
    fprintf(stderr,"Error: No default input device.\n");
    goto done;
  }
  inputParameters.channelCount = NUM_CHANNELS;
  inputParameters.sampleFormat = paFloat32;
  inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;
  inputParameters.hostApiSpecificStreamInfo = NULL;
  err = Pa_OpenStream(
       &stream,
       &inputParameters,
       NULL,
       SAMPLE_RATE,
       FRAMES_PER_BUFFER,
       paClipOff,   
       paCallback,
       &data );
  if (err != paNoError) goto done;
  err = Pa_StartStream(stream);
  if (err != paNoError) goto done;
  printf("Waiting for data to be available...\n"); fflush(stdout);
  while(1) {
    Pa_Sleep(1000);
    if (data.frameIndex >= data.maxFrameIndex) break;
  }
  err = Pa_StopStream(stream);
  if (err != paNoError) goto done;
  err = Pa_CloseStream(stream);
  if (err != paNoError) goto done;
  Pa_Terminate();
  return 0;
done:
  fprintf(stderr, "An error occurred while using the portaudio stream\n");
  fprintf(stderr, "Error number: %d\n", err);
  fprintf(stderr, "Error message: %s\n", Pa_GetErrorText(err));
  Pa_Terminate();
  if (data.recordedSamples) free(data.recordedSamples);
  return 1;
}

读取音频信号后，我们需要对其进行傅里叶变换，以便将时域信号转化为频域信号。这个过程可以使用FFTW库完成。FFTW是一个高效的、跨平台的傅里叶变换库，它可以在多核CPU上并行计算和优化计算。

进行傅里叶变换的代码如下所示：

#include "fftw3.h"
int main()
{
  //...
  fftw_complex *in, *out;
  fftw_plan p;
  in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);
  out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);
  p = fftw_plan_dft_1d(FRAMES_PER_BUFFER, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
  int i, j;
  for (i = 0; i < data.maxFrameIndex; i += FRAMES_PER_BUFFER) {
    for (j = 0; j < FRAMES_PER_BUFFER; j++) {
      in[j][0] = data.recordedSamples[(i + j) * NUM_CHANNELS];
      in[j][1] = 0.0;
    }
    fftw_execute(p);
    // ...
  }
  fftw_destroy_plan(p);
  fftw_free(in);
  fftw_free(out);
  //...
  return 0;
}

傅里叶变换完成后，我们需要对变换结果进行可视化显示。这个过程可以使用OpenGL来完成。OpenGL是一个跨平台的图形库，它可以在多种操作系统和硬件平台上运行，包括Windows、Mac OS和Linux等。

可视化显示的代码如下所示：

#include <GL/glut.h>
void display()
{
  //...
  int i;
  glBegin(GL_LINE_STRIP);
  for (i = 0; i < FRAMES_PER_BUFFER / 2; i++) {
    float x = (float)i / (FRAMES_PER_BUFFER / 2);
    float y = 20.0 * log10(sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]));
    glVertex2f(x, y);
  }
  glEnd();
  glutSwapBuffers();
}
int main(int argc, char **argv)
{
  //...
  glutInit(&argc, argv);
  glutInitWindowSize(640, 480);
  glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
  glutCreateWindow("Audio Spectrum");
  glutDisplayFunc(display);
  glClearColor(1.0, 1.0, 1.0, 0.0);
  glViewport(0, 0, 640, 480);
  glMatrixMode(GL_PROJECTION);
  glLoadIdentity();
  gluOrtho2D(0.0, 1.0, -100.0, 0.0);
  glMatrixMode(GL_MODELVIEW);
  glLoadIdentity();
  //...
  return 0;
}

最后，我们只需要将上述三个部分组合起来即可实现音频频谱显示。完整的代码如下所示：

#include "portaudio.h"
#include "fftw3.h"
#include <GL/glut.h>
#define SAMPLE_RATE (44100)
#define FRAMES_PER_BUFFER (512)
#define NUM_SECONDS (5)
#define NUM_CHANNELS (1)
typedef float SAMPLE;
typedef struct {
  int     frameIndex;
  int     maxFrameIndex;
  SAMPLE   *recordedSamples;
}
paData;
static int paCallback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData)
{
  paData *data = (paData*)userData;
  const SAMPLE *rptr = (const SAMPLE*)inputBuffer;
  SAMPLE *wptr = &data->recordedSamples[data->frameIndex * NUM_CHANNELS];
  long framesToCalc = framesPerBuffer;
  long i;
  int finished;
  unsigned long framesLeft = data->maxFrameIndex - data->frameIndex;
  if (framesLeft < framesToCalc)
    framesToCalc = framesLeft;
    finished = paComplete;
   else
    finished = paContinue;
  
  if (inputBuffer == NULL) {
    for (i = 0; i < framesToCalc; i++) {
      *wptr++ = SAMPLE_SILENCE;
      if (NUM_CHANNELS == 2) *wptr++ = SAMPLE_SILENCE;
    }
  } else {
    for (i = 0; i < framesToCalc; i++) {
      *wptr++ = *rptr++;
      if (NUM_CHANNELS == 2) *wptr++ = *rptr++;
    }
  }
  data->frameIndex += framesToCalc;
  return finished;
}
void display()
{
  fftw_complex *in, *out;
  fftw_plan p;
  in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);
  out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);
  p = fftw_plan_dft_1d(FRAMES_PER_BUFFER, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
  glColor3f(1.0, 0.0, 0.0);
  glClear(GL_COLOR_BUFFER_BIT);
  glLoadIdentity();
  int i, j;
  for (i = 0; i < data.maxFrameIndex; i += FRAMES_PER_BUFFER) {
    for (j = 0; j < FRAMES_PER_BUFFER; j++) {
      in[j][0] = data.recordedSamples[(i + j) * NUM_CHANNELS];
      in[j][1] = 0.0;
    }
    fftw_execute(p);
    glBegin(GL_LINE_STRIP);
    for (j = 0; j < FRAMES_PER_BUFFER / 2; j++) {
      float x = (float)j / (FRAMES_PER_BUFFER / 2);
      float y = 20.0 * log10(sqrt(out[j][0] * out[j][0] + out[j][1] * out[j][1]));
      glVertex2f(x, y);
    }
    glEnd();
  }
  fftw_destroy_plan(p);
  fftw_free(in);
  fftw_free(out);
  glutSwapBuffers();
}
int main(int argc, char **argv)
{
  PaStreamParameters inputParameters;
  PaStream *stream;
  PaError err;
  paData data;
  int numSamples = NUM_SECONDS * SAMPLE_RATE;
  int numBytes = numSamples * sizeof(SAMPLE);
  data.recordedSamples = (SAMPLE *) malloc(numBytes);
  data.maxFrameIndex = numSamples / NUM_CHANNELS;
  data.frameIndex = 0;
  err = Pa_Initialize();
  if (err != paNoError) return 1;
  inputParameters.device = Pa_GetDefaultInputDevice();
  if (inputParameters.device == paNoDevice) {
    fprintf(stderr,"Error: No default input device.\n");
    goto done;
  }
  inputParameters.channelCount = NUM_CHANNELS;
  inputParameters.sampleFormat = paFloat32;
  inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;
  inputParameters.hostApiSpecificStreamInfo = NULL;
  err = Pa_OpenStream(
       &stream,
       &inputParameters,
       NULL,
       SAMPLE_RATE,
       FRAMES_PER_BUFFER,
       paClipOff,   
       paCallback,
       &data );
  if (err != paNoError) goto done;
  err = Pa_StartStream(stream);
  if (err != paNoError) goto done;
  glutInit(&argc, argv);
  glutInitWindowSize(640, 480);
  glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
  glutCreateWindow("Audio Spectrum");
  glutDisplayFunc(display);
  glClearColor(1.0, 1.0, 1.0, 0.0);
  glViewport(0, 0, 640, 480);
  glMatrixMode(GL_PROJECTION);
  glLoadIdentity();
  gluOrtho2D(0.0, 1.0, -100.0, 0.0);
  glMatrixMode(GL_MODELVIEW);
  glLoadIdentity();
  printf("Waiting for data to be available...\n"); fflush(stdout);
  glutMainLoop();
  err = Pa_StopStream(stream);
  if (err != paNoError) goto done;
  err = Pa_CloseStream(stream);
  if (err != paNoError) goto done;
  Pa_Terminate();
  free(data.recordedSamples);
  return 0;
done:
  fprintf(stderr, "An error occurred while using the portaudio stream\n");
  fprintf(stderr, "Error number: %d\n", err);
  fprintf(stderr, "Error message: %s\n", Pa_GetErrorText(err));
  Pa_Terminate();
  if (data.recordedSamples) free(data.recordedSamples);
  return 1;
}

上述代码实现了读取音频信号、进行傅里叶变换、可视化显示的整个过程。用户可以使用这个代码作为基础，根据自己的需求进行修改和扩展，以便应用到实际的项目中。

上一篇: idea打包java可执行jar包

下一篇: C++实现回文数字符串判断函数

评论区

相似文章