21xrx.com
2025-04-24 23:31:36 Thursday
文章检索 我的文章 写文章
用Java实现卷积神经网络
2023-06-18 20:05:32 深夜i     90     0
Java 机器学习 卷积神经网络

在机器学习领域中深度学习是一个非常热门的话题,其中卷积神经网络(Convolutional Neural Networks,简称CNN)是最常用的一种网络模型。本文将介绍如何用Java语言实现卷积神经网络。

首先,我们需要了解CNN的基本结构。CNN由多个层级组成,每个层级执行特定的功能。常用的层级包括卷积层,池化层和全连接层。下面是一个简化的CNN的结构:

![CNN结构](cnn_structure.png)

接下来,我们将用Java实现一个简单的CNN模型,该模型由一个卷积层和一个全连接层组成。我们使用MNIST手写数字数据集进行训练和测试。以下是Java代码:

// 卷积层
class ConvolutionLayer {
 int[][][] kernels; // 卷积核
 int stride; // 步长
 int padding; // 填充
 int inputDepth; // 输入深度
 int inputHeight; // 输入高度
 int inputWidth; // 输入宽度
 int outputDepth; // 输出深度
 int outputHeight; // 输出高度
 int outputWidth; // 输出宽度
 ConvolutionLayer(int[][][] kernels, int stride, int padding, int inputDepth, int inputHeight, int inputWidth) {
  this.kernels = kernels;
  this.stride = stride;
  this.padding = padding;
  this.inputDepth = inputDepth;
  this.inputHeight = inputHeight;
  this.inputWidth = inputWidth;
  this.outputDepth = kernels.length;
  this.outputHeight = (inputHeight + 2 * padding - kernels[0].length) / stride + 1;
  this.outputWidth = (inputWidth + 2 * padding - kernels[0][0].length) / stride + 1;
 }
 public int[][][] forward(int[][][] inputs) {
  int[][][] paddedInputs = pad(inputs, padding);
  int[][][] outputs = new int[outputDepth][outputHeight][outputWidth];
  for (int i = 0; i < outputDepth; i++) {
   for (int j = 0; j < outputHeight; j++) {
    for (int k = 0; k < outputWidth; k++) {
     int value = 0;
     for (int l = 0; l < inputDepth; l++) {
      for (int m = 0; m < kernels[i][l].length; m++) {
       for (int n = 0; n < kernels[i][l][0].length; n++) {
        value += padsInputs[l][j*stride+m][k*stride+n] * kernels[i][l][m][n];
       }
      }
     }
     outputs[i][j][k] = value;
    }
   }
  }
  return outputs;
 }
 ...
}
// 全连接层
class FullyConnectedLayer {
 int inputSize; // 输入大小
 int outputSize; // 输出大小
 double[][] weights; // 权重
 double[] biases; // 偏差
 FullyConnectedLayer(int inputSize, int outputSize) {
  this.inputSize = inputSize;
  this.outputSize = outputSize;
  Random r = new Random();
  this.weights = new double[inputSize][outputSize];
  for (int i = 0; i < inputSize; i++) {
   for (int j = 0; j < outputSize; j++) {
    this.weights[i][j] = r.nextGaussian();
   }
  }
  this.biases = new double[outputSize];
 }
 public double[] forward(double[] inputs) {
  double[] outputs = new double[outputSize];
  for (int i = 0; i < outputSize; i++) {
   double value = 0;
   for (int j = 0; j < inputSize; j++) {
    value += inputs[j] * weights[j][i];
   }
   outputs[i] = value + biases[i];
  }
  return outputs;
 }
 ...
}
public class CNN {
 ConvolutionLayer convLayer;
 FullyConnectedLayer fcLayer;
 CNN(ConvolutionLayer convLayer, FullyConnectedLayer fcLayer)
  this.convLayer = convLayer;
  this.fcLayer = fcLayer;
 
 public double[] predict(int[][][] image) {
  int[][][] convOutput = convLayer.forward(image);
  double[] flattenedOutput = flatten(convOutput);
  double[] fcOutput = fcLayer.forward(flattenedOutput);
  return fcOutput;
 }
 ...
}
// 训练模型
public static void main(String[] args) throws Exception {
 // 加载数据集
 Mnist mnist = new Mnist();
 int[][][] trainImages = mnist.getTrainImages();
 int[] trainLabels = mnist.getTrainLabels();
 int[][][] testImages = mnist.getTestImages();
 int[] testLabels = mnist.getTestLabels();
 // 创建网络模型
 int numClasses = 10;
 int inputDepth = 1;
 int inputHeight = 28;
 int inputWidth = 28;
 ConvolutionLayer convLayer = new ConvolutionLayer(new int[][][]{{{1,1,1},{1,1,1},{1,1,1}}}, 1, 1, inputDepth, inputHeight, inputWidth);
 FullyConnectedLayer fcLayer = new FullyConnectedLayer(convLayer.outputDepth * convLayer.outputHeight * convLayer.outputWidth, numClasses);
 CNN cnn = new CNN(convLayer, fcLayer);
 // 训练模型
 int batchSize = 128;
 double learningRate = 0.01;
 int numEpochs = 10;
 int numSteps = trainImages.length / batchSize;
 for (int epoch = 0; epoch < numEpochs; epoch++) {
  for (int step = 0; step < numSteps; step++) {
   int[] batchLabels = Arrays.copyOfRange(trainLabels, step * batchSize, (step + 1) * batchSize);
   int[][][] batchImages = Arrays.copyOfRange(trainImages, step * batchSize, (step + 1) * batchSize);
   double[][] batchOutputs = new double[batchSize][numClasses];
   for (int i = 0; i < batchSize; i++) {
    batchOutputs[i][batchLabels[i]] = 1;
   }
   // 前向传播
   double[][][] convOutputs = convLayer.forward(batchImages);
   double[] flattenedOutputs = flatten(convOutputs);
   double[] fcOutputs = fcLayer.forward(flattenedOutputs);
   double loss = crossEntropyLoss(fcOutputs, batchOutputs);
   // 反向传播
   double[] fcDeltas = subtract(fcOutputs, batchOutputs);
   double[] flattenedDeltas = matrixMultiplyTranspose(fcDeltas, fcLayer.weights);
   double[][][] convDeltas = unflatten(flattenedDeltas, convLayer.outputDepth, convLayer.outputHeight, convLayer.outputWidth);
   convLayer.backward(batchImages, convOutputs, convDeltas);
   fcLayer.backward(flattenedOutputs, fcDeltas);
   // 更新权重
   convLayer.update(learningRate);
   fcLayer.update(learningRate);
  }
  // 测试模型
  int numCorrect = 0;
  for (int i = 0; i < testImages.length; i++) {
   int[] image = testImages[i];
   int label = testLabels[i];
   double[] output = cnn.predict(image);
   int prediction = argmax(output);
   if (prediction == label) {
    numCorrect++;
   }
  }
  double accuracy = (double) numCorrect / testImages.length;
  System.out.printf("Epoch %d, test accuracy: %.2f%%\n", epoch, accuracy * 100);
 }
}

在main函数中,我们使用MNIST数据集来训练我们的模型。我们首先加载数据集,然后创建一个CNN对象。我们使用一个包含3x3的卷积核的卷积层和一个全连接层来构建模型。我们使用交叉熵损失函数进行训练,并通过反向传播算法更新网络权重。每个epoch结束后,我们使用测试数据集来评估模型的准确性。

本文演示了如何使用Java实现卷积神经网络,并在MNIST数据集上训练和测试模型。Java不仅在企业应用领域得到广泛应用,也可以用来实现深度学习模型。此外,通过使用Java,我们可以借助Java的强类型和面向对象特性,开发更健壮和可维护的代码。

  
  

评论区