基于Java语言的小型AI案例

goffwo · 发表于 2023-5-25 16:31

下面是一个基于Java语言的小型AI案例，使用朴素贝叶斯算法进行垃圾邮件分类。

代码示例如下：

```java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

public class SpamClassifier {
private Map<String, Integer> spamWordsCount;
private Map<String, Integer> hamWordsCount;
private int totalSpamCount;
private int totalHamCount;

public SpamClassifier() {
      spamWordsCount = new HashMap<>();
      hamWordsCount = new HashMap<>();
      totalSpamCount = 0;
      totalHamCount = 0;
}

public void train(String filePath, String label) throws IOException {
      BufferedReader reader = new BufferedReader(new FileReader(filePath));
      String line;
      while ((line = reader.readLine()) != null) {
         String[] words = line.split(" ");
         for (String word : words) {
            if (label.equals("spam")) {
                  spamWordsCount.put(word, spamWordsCount.getOrDefault(word, 0) + 1);
                  totalSpamCount++;
            } else {
                  hamWordsCount.put(word, hamWordsCount.getOrDefault(word, 0) + 1);
                  totalHamCount++;
            }
         }
      }
      reader.close();
}

public double classify(String message) {
      double spamProbability = 0.0;
      double hamProbability = 0.0;

      String[] words = message.split(" ");
      for (String word : words) {
         int spamWordCount = spamWordsCount.getOrDefault(word, 0);
         int hamWordCount = hamWordsCount.getOrDefault(word, 0);

         spamProbability += Math.log((spamWordCount + 1.0) / (totalSpamCount + spamWordsCount.size()));
         hamProbability += Math.log((hamWordCount + 1.0) / (totalHamCount + hamWordsCount.size()));
      }

      return spamProbability > hamProbability ? spamProbability : hamProbability;
}

public static void main(String[] args) throws IOException {
      SpamClassifier classifier = new SpamClassifier();

      // 训练垃圾邮件样本
      classifier.train("spam_samples.txt", "spam");

      // 训练非垃圾邮件样本
      classifier.train("ham_samples.txt", "ham");

      // 测试邮件
      String testMessage = "Get a free gift! Limited time offer!";
      double probability = classifier.classify(testMessage);

      if (probability > 0) {
         System.out.println("The message is classified as spam.");
      } else {
         System.out.println("The message is classified as ham.");
      }
}
}
```

在上面的示例中，`train`方法用于训练垃圾邮件和非垃圾邮件样本，通过读取文件中的文本内容，统计每个单词在垃圾邮件和非垃圾邮件中的出现次数。`classify`方法用于对给定的邮件内容进行分类，计算垃圾邮件和非垃圾邮件的概率，并返回概率较高的分类结果。

在`main`方法中，我们首先创建`SpamClassifier`对象，然后使用`train`方法分别训练垃

圾邮件和非垃圾邮件样本。接下来，我们使用`classify`方法对一个测试邮件进行分类，并根据概率结果输出分类结果。

请注意，上述代码示例仅为一个简单的垃圾邮件分类器，仅供演示用途。实际的垃圾邮件分类器可能需要更复杂的特征提取和模型训练过程，以及更大规模的数据集来提高分类准确率。

goffwo · 发表于 2023-5-25 16:36

下面是一个基于Java语言的小型AI案例，使用决策树算法进行鸢尾花分类。

代码示例如下：

```java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

class Iris {
private double sepalLength;
private double sepalWidth;
private double petalLength;
private double petalWidth;
private String species;

public Iris(double sepalLength, double sepalWidth, double petalLength, double petalWidth, String species) {
      this.sepalLength = sepalLength;
      this.sepalWidth = sepalWidth;
      this.petalLength = petalLength;
      this.petalWidth = petalWidth;
      this.species = species;
}

public double getSepalLength() {
      return sepalLength;
}

public double getSepalWidth() {
      return sepalWidth;
}

public double getPetalLength() {
      return petalLength;
}

public double getPetalWidth() {
      return petalWidth;
}

public String getSpecies() {
      return species;
}
}

class DecisionTree {
private Node root;

public DecisionTree(Node root) {
      this.root = root;
}

public String classify(Iris iris) {
      return root.predict(iris);
}
}

abstract class Node {
public abstract String predict(Iris iris);
}

class LeafNode extends Node {
private String species;

public LeafNode(String species) {
      this.species = species;
}

@Override
public String predict(Iris iris) {
      return species;
}
}

class InternalNode extends Node {
private String attribute;
private double threshold;
private Node leftChild;
private Node rightChild;

public InternalNode(String attribute, double threshold, Node leftChild, Node rightChild) {
      this.attribute = attribute;
      this.threshold = threshold;
      this.leftChild = leftChild;
      this.rightChild = rightChild;
}

@Override
public String predict(Iris iris) {
      if (iris.getSpecies().equals("unknown")) {
         if (iris.getFeatureValue(attribute) <= threshold) {
            return leftChild.predict(iris);
         } else {
            return rightChild.predict(iris);
         }
      } else {
         return iris.getSpecies();
      }
}
}

class DecisionTreeBuilder {
public static DecisionTree buildTree(List<Iris> dataset) {
      return buildTreeRecursive(dataset, new ArrayList<>(dataset.get(0).getFeatures()));
}

private static DecisionTree buildTreeRecursive(List<Iris> dataset, List<String> features) {
      if (isHomogeneous(dataset)) {
         return new DecisionTree(new LeafNode(dataset.get(0).getSpecies()));
      }

      if (features.isEmpty()) {
         String majoritySpecies = getMajoritySpecies(dataset);
         return new DecisionTree(new LeafNode(majoritySpecies));
      }

      Split bestSplit = findBestSplit(dataset, features);
      features.remove(bestSplit.getAttribute());

      List<Iris> leftSubset = new ArrayList<>();
      List<Iris> rightSubset = new ArrayList<>();
      for (Iris iris : dataset) {
         if (iris.getFeatureValue(bestSplit.getAttribute()) <= bestSplit.getThreshold()) {
            leftSubset.add(iris);
         } else {
            rightSubset.add(iris);
         }
      }

      Node leftChild = buildTreeRecursive(leftSubset, new ArrayList<>(features));
      Node rightChild = buildTreeRecursive(rightSubset, new ArrayList<>(features));

      return new DecisionTree(new InternalNode(best

Split.getAttribute(), bestSplit.getThreshold(), leftChild, rightChild));
}

private static boolean isHomogeneous(List<Iris> dataset) {
      String species = dataset.get(0).getSpecies();
      for (Iris iris : dataset) {
         if (!iris.getSpecies().equals(species)) {
            return false;
         }
      }
      return true;
}

private static String getMajoritySpecies(List<Iris> dataset) {
      int[] speciesCounts = new int[3];
      for (Iris iris : dataset) {
         switch (iris.getSpecies()) {
            case "setosa":
                  speciesCounts[0]++;
                  break;
            case "versicolor":
                  speciesCounts[1]++;
                  break;
            case "virginica":
                  speciesCounts[2]++;
                  break;
         }
      }

      int maxCountIndex = 0;
      for (int i = 1; i < speciesCounts.length; i++) {
         if (speciesCounts[i] > speciesCounts[maxCountIndex]) {
            maxCountIndex = i;
         }
      }

      switch (maxCountIndex) {
         case 0:
            return "setosa";
         case 1:
            return "versicolor";
         default:
            return "virginica";
      }
}

private static Split findBestSplit(List<Iris> dataset, List<String> features) {
      double bestGiniIndex = Double.MAX_VALUE;
      String bestAttribute = "";
      double bestThreshold = 0.0;

      for (String attribute : features) {
         List<Double> featureValues = new ArrayList<>();
         for (Iris iris : dataset) {
            featureValues.add(iris.getFeatureValue(attribute));
         }
         featureValues.sort(null);

         for (int i = 1; i < featureValues.size(); i++) {
            double threshold = (featureValues.get(i - 1) + featureValues.get(i)) / 2.0;
            Split split = new Split(attribute, threshold);
            double giniIndex = calculateGiniIndex(dataset, split);
            if (giniIndex < bestGiniIndex) {
                  bestGiniIndex = giniIndex;
                  bestAttribute = attribute;
                  bestThreshold = threshold;
            }
         }
      }

      return new Split(bestAttribute, bestThreshold);
}

private static double calculateGiniIndex(List<Iris> dataset, Split split) {
      List<Iris> leftSubset = new ArrayList<>();
      List<Iris> rightSubset = new ArrayList<>();
      for (Iris iris : dataset) {
         if (iris.getFeatureValue(split.getAttribute()) <= split.getThreshold()) {
            leftSubset.add(iris);
         } else {
            rightSubset.add(iris);
         }
      }

      double leftSubsetWeight = (double) leftSubset.size() / dataset.size();
      double rightSubsetWeight = (double) rightSubset.size() / dataset.size();

      double giniIndex = 0.0;
      for (List<Iris> subset : List.of(leftSubset, rightSubset)) {
         if (subset.isEmpty()) {
            continue;
         }

         double[] speciesCounts = new double[3];
         for (Iris iris : subset) {
            switch (iris.getSpecies()) {
                  case "setosa":
                     speciesCounts[0]++;
                     break;
                  case "versicolor":
                     speciesCounts[1]++;
                     break;
                  case "virginica":
                     speciesCounts[2]++;
                     break;
            }
         }

         double impurity = 1.0;
         for (double count : speciesCounts) {
            double probability = count / subset.size();
            impurity -= Math.pow(probability, 2);
         }

         if (subset == leftSubset) {
            giniIndex += leftSubsetWeight * impurity;
         } else {
            giniIndex += rightSubsetWeight * impurity;
         }
      }

      return giniIndex;
}
}

public class IrisClassifier {
public static void main(String[] args) throws IOException {
      List<Iris> trainingData = loadTrainingData("iris_training_data.txt");

      DecisionTree decisionTree = DecisionTreeBuilder.buildTree(trainingData);

      List<Iris> testData = loadTestData("iris_test_data.txt");

      for (Iris iris : testData) {
         String predictedSpecies = decisionTree.classify(iris);
         System.out.println("Predicted species: " + predictedSpecies + ", Actual species: " + iris.getSpecies());
      }
}

private static List<Iris> loadTrainingData(String filePath) throws IOException {
      List<Iris> dataset = new ArrayList<>();
      BufferedReader reader = new BufferedReader(new FileReader(filePath));
      String line;
      while ((line = reader.readLine()) != null) {
         String[] values = line.split(",");
         double sepalLength = Double.parseDouble(values[0]);
         double sepalWidth = Double.parseDouble(values[1]);
         double petalLength = Double.parseDouble(values[2]);
         double petalWidth = Double.parseDouble(values[3]);
         String species = values[4];
         dataset.add(new Iris(sepalLength, sepalWidth, petalLength, petalWidth, species));
      }
      reader.close();
      return dataset;
}

private static List<Iris> loadTestData(String filePath) throws IOException {
      List<Iris> dataset = new ArrayList<>();
      BufferedReader reader = new BufferedReader(new FileReader(filePath));
      String line;
      while ((line = reader.readLine()) != null) {
         String[] values = line.split(",");
         double sepalLength = Double.parseDouble(values[0]);
         double sepalWidth = Double.parseDouble(values[1]);
         double petalLength = Double.parseDouble(values[2]);
         double petalWidth = Double.parseDouble(values[3]);
         dataset.add(new Iris(sepalLength, sepalWidth, petalLength, petalWidth, "unknown"));
      }
      reader.close();
      return dataset;
}
}
```

在上述示例中，我们定义了`Iris`类来表示鸢尾花的属性和类别，并定义了`DecisionTree`类来表示决策树模型。`Node`是一个抽象类，它有两个子类，`LeafNode`表示叶节点，`InternalNode`表示内部节点。`DecisionTreeBuilder`类负责构建决策树模型。`IrisClassifier`类是程序的入口点，加载训练数据和测试数据，并使用构建的决策树对测试数据进行分类预测。

要运行此代码，你需要创建两个数据文件：`iris_training_data.txt`和`iris_test_data.txt`，分别包含用于训练和测试的鸢尾花数据。每行数据应包含四个特征值（sepala长度、sepala宽度、petala长度和petala宽度）和一个类别值（setosa、versicolor或virginica），用逗号分隔。

此示例仅仅是一个简单的鸢尾花分类器，仅供演示目的。在实际应用中，你可能需要更复杂的特征工程和更大

规模的数据集来构建准确的分类模型。

Dlan · 发表于 2023-5-25 17:51

chatgpt写的吧

dadaliya · 发表于 2023-5-25 22:55

小白学习中

52pojie1102 · 发表于 2023-5-25 23:22

注解都没，铁定不是自己的了

pjy612 · 发表于 2023-5-26 01:34

想到了那个复读机ai

2568 · 发表于 2023-5-26 10:28

没有注释看不懂啊

三滑稽甲苯 · 发表于 2023-5-26 11:52

疑源dz, 鉴定为：chatGPT
(好歹把代码放到代码框里再发吧

帐号		自动登录	找回密码
密码			注册[Register]

[学习记录] 基于Java语言的小型AI案例

免费评分