21xrx.com
2024-12-22 17:58:13 Sunday
登录
文章检索 我的文章 写文章
使用C++实现音频频谱显示
2023-07-06 04:19:24 深夜i     --     --
C++ 音频 频谱 显示 实现

音频频谱显示是一种将音频信号转化为可视化图像的技术。这种技术可以帮助用户更加直观地理解音频信号的结构和特征,而且在音频编辑、混音、剪辑等领域也有着广泛的应用。

本文将介绍如何使用C++实现音频频谱显示。我们会使用一个名为FFTW的开源库来对音频信号进行傅里叶变换,然后将变换后的数据进行可视化显示。

首先,我们需要读取音频文件并将其表示为一个数字信号。这个过程可以使用一个叫做PortAudio的库完成。PortAudio是一个跨平台的音频输入/输出库,它支持多种操作系统,包括Windows、Mac OS和Linux等。

读取音频文件并生成数字信号的代码如下所示:


#include "portaudio.h"

#define SAMPLE_RATE (44100)

#define FRAMES_PER_BUFFER (512)

#define NUM_SECONDS (5)

#define NUM_CHANNELS (1)

typedef float SAMPLE;

typedef struct {

  int     frameIndex;

  int     maxFrameIndex;

  SAMPLE   *recordedSamples;

}

paData;

static int paCallback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData)

{

  paData *data = (paData*)userData;

  const SAMPLE *rptr = (const SAMPLE*)inputBuffer;

  SAMPLE *wptr = &data->recordedSamples[data->frameIndex * NUM_CHANNELS];

  long framesToCalc = framesPerBuffer;

  long i;

  int finished;

  unsigned long framesLeft = data->maxFrameIndex - data->frameIndex;

  if (framesLeft < framesToCalc)

    framesToCalc = framesLeft;

    finished = paComplete;

   else

    finished = paContinue;

  

  if (inputBuffer == NULL) {

    for (i = 0; i < framesToCalc; i++) {

      *wptr++ = SAMPLE_SILENCE;

      if (NUM_CHANNELS == 2) *wptr++ = SAMPLE_SILENCE;

    }

  } else {

    for (i = 0; i < framesToCalc; i++) {

      *wptr++ = *rptr++;

      if (NUM_CHANNELS == 2) *wptr++ = *rptr++;

    }

  }

  data->frameIndex += framesToCalc;

  return finished;

}

int main()

{

  PaStreamParameters inputParameters;

  PaStream *stream;

  PaError err;

  paData data;

  int numSamples = NUM_SECONDS * SAMPLE_RATE;

  int numBytes = numSamples * sizeof(SAMPLE);

  data.recordedSamples = (SAMPLE *) malloc(numBytes);

  data.maxFrameIndex = numSamples / NUM_CHANNELS;

  data.frameIndex = 0;

  err = Pa_Initialize();

  if (err != paNoError) return 1;

  inputParameters.device = Pa_GetDefaultInputDevice();

  if (inputParameters.device == paNoDevice) {

    fprintf(stderr,"Error: No default input device.\n");

    goto done;

  }

  inputParameters.channelCount = NUM_CHANNELS;

  inputParameters.sampleFormat = paFloat32;

  inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;

  inputParameters.hostApiSpecificStreamInfo = NULL;

  err = Pa_OpenStream(

       &stream,

       &inputParameters,

       NULL,

       SAMPLE_RATE,

       FRAMES_PER_BUFFER,

       paClipOff,   

       paCallback,

       &data );

  if (err != paNoError) goto done;

  err = Pa_StartStream(stream);

  if (err != paNoError) goto done;

  printf("Waiting for data to be available...\n"); fflush(stdout);

  while(1) {

    Pa_Sleep(1000);

    if (data.frameIndex >= data.maxFrameIndex) break;

  }

  err = Pa_StopStream(stream);

  if (err != paNoError) goto done;

  err = Pa_CloseStream(stream);

  if (err != paNoError) goto done;

  Pa_Terminate();

  return 0;

done:

  fprintf(stderr, "An error occurred while using the portaudio stream\n");

  fprintf(stderr, "Error number: %d\n", err);

  fprintf(stderr, "Error message: %s\n", Pa_GetErrorText(err));

  Pa_Terminate();

  if (data.recordedSamples) free(data.recordedSamples);

  return 1;

}

读取音频信号后,我们需要对其进行傅里叶变换,以便将时域信号转化为频域信号。这个过程可以使用FFTW库完成。FFTW是一个高效的、跨平台的傅里叶变换库,它可以在多核CPU上并行计算和优化计算。

进行傅里叶变换的代码如下所示:


#include "fftw3.h"

int main()

{

  //...

  fftw_complex *in, *out;

  fftw_plan p;

  in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);

  out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);

  p = fftw_plan_dft_1d(FRAMES_PER_BUFFER, in, out, FFTW_FORWARD, FFTW_ESTIMATE);

  int i, j;

  for (i = 0; i < data.maxFrameIndex; i += FRAMES_PER_BUFFER) {

    for (j = 0; j < FRAMES_PER_BUFFER; j++) {

      in[j][0] = data.recordedSamples[(i + j) * NUM_CHANNELS];

      in[j][1] = 0.0;

    }

    fftw_execute(p);

    // ...

  }

  fftw_destroy_plan(p);

  fftw_free(in);

  fftw_free(out);

  //...

  return 0;

}

傅里叶变换完成后,我们需要对变换结果进行可视化显示。这个过程可以使用OpenGL来完成。OpenGL是一个跨平台的图形库,它可以在多种操作系统和硬件平台上运行,包括Windows、Mac OS和Linux等。

可视化显示的代码如下所示:


#include <GL/glut.h>

void display()

{

  //...

  int i;

  glBegin(GL_LINE_STRIP);

  for (i = 0; i < FRAMES_PER_BUFFER / 2; i++) {

    float x = (float)i / (FRAMES_PER_BUFFER / 2);

    float y = 20.0 * log10(sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]));

    glVertex2f(x, y);

  }

  glEnd();

  glutSwapBuffers();

}

int main(int argc, char **argv)

{

  //...

  glutInit(&argc, argv);

  glutInitWindowSize(640, 480);

  glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);

  glutCreateWindow("Audio Spectrum");

  glutDisplayFunc(display);

  glClearColor(1.0, 1.0, 1.0, 0.0);

  glViewport(0, 0, 640, 480);

  glMatrixMode(GL_PROJECTION);

  glLoadIdentity();

  gluOrtho2D(0.0, 1.0, -100.0, 0.0);

  glMatrixMode(GL_MODELVIEW);

  glLoadIdentity();

  //...

  return 0;

}

最后,我们只需要将上述三个部分组合起来即可实现音频频谱显示。完整的代码如下所示:


#include "portaudio.h"

#include "fftw3.h"

#include <GL/glut.h>

#define SAMPLE_RATE (44100)

#define FRAMES_PER_BUFFER (512)

#define NUM_SECONDS (5)

#define NUM_CHANNELS (1)

typedef float SAMPLE;

typedef struct {

  int     frameIndex;

  int     maxFrameIndex;

  SAMPLE   *recordedSamples;

}

paData;

static int paCallback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData)

{

  paData *data = (paData*)userData;

  const SAMPLE *rptr = (const SAMPLE*)inputBuffer;

  SAMPLE *wptr = &data->recordedSamples[data->frameIndex * NUM_CHANNELS];

  long framesToCalc = framesPerBuffer;

  long i;

  int finished;

  unsigned long framesLeft = data->maxFrameIndex - data->frameIndex;

  if (framesLeft < framesToCalc)

    framesToCalc = framesLeft;

    finished = paComplete;

   else

    finished = paContinue;

  

  if (inputBuffer == NULL) {

    for (i = 0; i < framesToCalc; i++) {

      *wptr++ = SAMPLE_SILENCE;

      if (NUM_CHANNELS == 2) *wptr++ = SAMPLE_SILENCE;

    }

  } else {

    for (i = 0; i < framesToCalc; i++) {

      *wptr++ = *rptr++;

      if (NUM_CHANNELS == 2) *wptr++ = *rptr++;

    }

  }

  data->frameIndex += framesToCalc;

  return finished;

}

void display()

{

  fftw_complex *in, *out;

  fftw_plan p;

  in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);

  out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FRAMES_PER_BUFFER);

  p = fftw_plan_dft_1d(FRAMES_PER_BUFFER, in, out, FFTW_FORWARD, FFTW_ESTIMATE);

  glColor3f(1.0, 0.0, 0.0);

  glClear(GL_COLOR_BUFFER_BIT);

  glLoadIdentity();

  int i, j;

  for (i = 0; i < data.maxFrameIndex; i += FRAMES_PER_BUFFER) {

    for (j = 0; j < FRAMES_PER_BUFFER; j++) {

      in[j][0] = data.recordedSamples[(i + j) * NUM_CHANNELS];

      in[j][1] = 0.0;

    }

    fftw_execute(p);

    glBegin(GL_LINE_STRIP);

    for (j = 0; j < FRAMES_PER_BUFFER / 2; j++) {

      float x = (float)j / (FRAMES_PER_BUFFER / 2);

      float y = 20.0 * log10(sqrt(out[j][0] * out[j][0] + out[j][1] * out[j][1]));

      glVertex2f(x, y);

    }

    glEnd();

  }

  fftw_destroy_plan(p);

  fftw_free(in);

  fftw_free(out);

  glutSwapBuffers();

}

int main(int argc, char **argv)

{

  PaStreamParameters inputParameters;

  PaStream *stream;

  PaError err;

  paData data;

  int numSamples = NUM_SECONDS * SAMPLE_RATE;

  int numBytes = numSamples * sizeof(SAMPLE);

  data.recordedSamples = (SAMPLE *) malloc(numBytes);

  data.maxFrameIndex = numSamples / NUM_CHANNELS;

  data.frameIndex = 0;

  err = Pa_Initialize();

  if (err != paNoError) return 1;

  inputParameters.device = Pa_GetDefaultInputDevice();

  if (inputParameters.device == paNoDevice) {

    fprintf(stderr,"Error: No default input device.\n");

    goto done;

  }

  inputParameters.channelCount = NUM_CHANNELS;

  inputParameters.sampleFormat = paFloat32;

  inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;

  inputParameters.hostApiSpecificStreamInfo = NULL;

  err = Pa_OpenStream(

       &stream,

       &inputParameters,

       NULL,

       SAMPLE_RATE,

       FRAMES_PER_BUFFER,

       paClipOff,   

       paCallback,

       &data );

  if (err != paNoError) goto done;

  err = Pa_StartStream(stream);

  if (err != paNoError) goto done;

  glutInit(&argc, argv);

  glutInitWindowSize(640, 480);

  glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);

  glutCreateWindow("Audio Spectrum");

  glutDisplayFunc(display);

  glClearColor(1.0, 1.0, 1.0, 0.0);

  glViewport(0, 0, 640, 480);

  glMatrixMode(GL_PROJECTION);

  glLoadIdentity();

  gluOrtho2D(0.0, 1.0, -100.0, 0.0);

  glMatrixMode(GL_MODELVIEW);

  glLoadIdentity();

  printf("Waiting for data to be available...\n"); fflush(stdout);

  glutMainLoop();

  err = Pa_StopStream(stream);

  if (err != paNoError) goto done;

  err = Pa_CloseStream(stream);

  if (err != paNoError) goto done;

  Pa_Terminate();

  free(data.recordedSamples);

  return 0;

done:

  fprintf(stderr, "An error occurred while using the portaudio stream\n");

  fprintf(stderr, "Error number: %d\n", err);

  fprintf(stderr, "Error message: %s\n", Pa_GetErrorText(err));

  Pa_Terminate();

  if (data.recordedSamples) free(data.recordedSamples);

  return 1;

}

上述代码实现了读取音频信号、进行傅里叶变换、可视化显示的整个过程。用户可以使用这个代码作为基础,根据自己的需求进行修改和扩展,以便应用到实际的项目中。

  
  

评论区

{{item['qq_nickname']}}
()
回复
回复