好友
阅读权限10
听众
最后登录1970-1-1
|
楼主|
goffwo
发表于 2023-5-25 16:36
下面是一个基于Java语言的小型AI案例,使用决策树算法进行鸢尾花分类。
代码示例如下:
```java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
class Iris {
private double sepalLength;
private double sepalWidth;
private double petalLength;
private double petalWidth;
private String species;
public Iris(double sepalLength, double sepalWidth, double petalLength, double petalWidth, String species) {
this.sepalLength = sepalLength;
this.sepalWidth = sepalWidth;
this.petalLength = petalLength;
this.petalWidth = petalWidth;
this.species = species;
}
public double getSepalLength() {
return sepalLength;
}
public double getSepalWidth() {
return sepalWidth;
}
public double getPetalLength() {
return petalLength;
}
public double getPetalWidth() {
return petalWidth;
}
public String getSpecies() {
return species;
}
}
class DecisionTree {
private Node root;
public DecisionTree(Node root) {
this.root = root;
}
public String classify(Iris iris) {
return root.predict(iris);
}
}
abstract class Node {
public abstract String predict(Iris iris);
}
class LeafNode extends Node {
private String species;
public LeafNode(String species) {
this.species = species;
}
@Override
public String predict(Iris iris) {
return species;
}
}
class InternalNode extends Node {
private String attribute;
private double threshold;
private Node leftChild;
private Node rightChild;
public InternalNode(String attribute, double threshold, Node leftChild, Node rightChild) {
this.attribute = attribute;
this.threshold = threshold;
this.leftChild = leftChild;
this.rightChild = rightChild;
}
@Override
public String predict(Iris iris) {
if (iris.getSpecies().equals("unknown")) {
if (iris.getFeatureValue(attribute) <= threshold) {
return leftChild.predict(iris);
} else {
return rightChild.predict(iris);
}
} else {
return iris.getSpecies();
}
}
}
class DecisionTreeBuilder {
public static DecisionTree buildTree(List<Iris> dataset) {
return buildTreeRecursive(dataset, new ArrayList<>(dataset.get(0).getFeatures()));
}
private static DecisionTree buildTreeRecursive(List<Iris> dataset, List<String> features) {
if (isHomogeneous(dataset)) {
return new DecisionTree(new LeafNode(dataset.get(0).getSpecies()));
}
if (features.isEmpty()) {
String majoritySpecies = getMajoritySpecies(dataset);
return new DecisionTree(new LeafNode(majoritySpecies));
}
Split bestSplit = findBestSplit(dataset, features);
features.remove(bestSplit.getAttribute());
List<Iris> leftSubset = new ArrayList<>();
List<Iris> rightSubset = new ArrayList<>();
for (Iris iris : dataset) {
if (iris.getFeatureValue(bestSplit.getAttribute()) <= bestSplit.getThreshold()) {
leftSubset.add(iris);
} else {
rightSubset.add(iris);
}
}
Node leftChild = buildTreeRecursive(leftSubset, new ArrayList<>(features));
Node rightChild = buildTreeRecursive(rightSubset, new ArrayList<>(features));
return new DecisionTree(new InternalNode(best
Split.getAttribute(), bestSplit.getThreshold(), leftChild, rightChild));
}
private static boolean isHomogeneous(List<Iris> dataset) {
String species = dataset.get(0).getSpecies();
for (Iris iris : dataset) {
if (!iris.getSpecies().equals(species)) {
return false;
}
}
return true;
}
private static String getMajoritySpecies(List<Iris> dataset) {
int[] speciesCounts = new int[3];
for (Iris iris : dataset) {
switch (iris.getSpecies()) {
case "setosa":
speciesCounts[0]++;
break;
case "versicolor":
speciesCounts[1]++;
break;
case "virginica":
speciesCounts[2]++;
break;
}
}
int maxCountIndex = 0;
for (int i = 1; i < speciesCounts.length; i++) {
if (speciesCounts[i] > speciesCounts[maxCountIndex]) {
maxCountIndex = i;
}
}
switch (maxCountIndex) {
case 0:
return "setosa";
case 1:
return "versicolor";
default:
return "virginica";
}
}
private static Split findBestSplit(List<Iris> dataset, List<String> features) {
double bestGiniIndex = Double.MAX_VALUE;
String bestAttribute = "";
double bestThreshold = 0.0;
for (String attribute : features) {
List<Double> featureValues = new ArrayList<>();
for (Iris iris : dataset) {
featureValues.add(iris.getFeatureValue(attribute));
}
featureValues.sort(null);
for (int i = 1; i < featureValues.size(); i++) {
double threshold = (featureValues.get(i - 1) + featureValues.get(i)) / 2.0;
Split split = new Split(attribute, threshold);
double giniIndex = calculateGiniIndex(dataset, split);
if (giniIndex < bestGiniIndex) {
bestGiniIndex = giniIndex;
bestAttribute = attribute;
bestThreshold = threshold;
}
}
}
return new Split(bestAttribute, bestThreshold);
}
private static double calculateGiniIndex(List<Iris> dataset, Split split) {
List<Iris> leftSubset = new ArrayList<>();
List<Iris> rightSubset = new ArrayList<>();
for (Iris iris : dataset) {
if (iris.getFeatureValue(split.getAttribute()) <= split.getThreshold()) {
leftSubset.add(iris);
} else {
rightSubset.add(iris);
}
}
double leftSubsetWeight = (double) leftSubset.size() / dataset.size();
double rightSubsetWeight = (double) rightSubset.size() / dataset.size();
double giniIndex = 0.0;
for (List<Iris> subset : List.of(leftSubset, rightSubset)) {
if (subset.isEmpty()) {
continue;
}
double[] speciesCounts = new double[3];
for (Iris iris : subset) {
switch (iris.getSpecies()) {
case "setosa":
speciesCounts[0]++;
break;
case "versicolor":
speciesCounts[1]++;
break;
case "virginica":
speciesCounts[2]++;
break;
}
}
double impurity = 1.0;
for (double count : speciesCounts) {
double probability = count / subset.size();
impurity -= Math.pow(probability, 2);
}
if (subset == leftSubset) {
giniIndex += leftSubsetWeight * impurity;
} else {
giniIndex += rightSubsetWeight * impurity;
}
}
return giniIndex;
}
}
public class IrisClassifier {
public static void main(String[] args) throws IOException {
List<Iris> trainingData = loadTrainingData("iris_training_data.txt");
DecisionTree decisionTree = DecisionTreeBuilder.buildTree(trainingData);
List<Iris> testData = loadTestData("iris_test_data.txt");
for (Iris iris : testData) {
String predictedSpecies = decisionTree.classify(iris);
System.out.println("Predicted species: " + predictedSpecies + ", Actual species: " + iris.getSpecies());
}
}
private static List<Iris> loadTrainingData(String filePath) throws IOException {
List<Iris> dataset = new ArrayList<>();
BufferedReader reader = new BufferedReader(new FileReader(filePath));
String line;
while ((line = reader.readLine()) != null) {
String[] values = line.split(",");
double sepalLength = Double.parseDouble(values[0]);
double sepalWidth = Double.parseDouble(values[1]);
double petalLength = Double.parseDouble(values[2]);
double petalWidth = Double.parseDouble(values[3]);
String species = values[4];
dataset.add(new Iris(sepalLength, sepalWidth, petalLength, petalWidth, species));
}
reader.close();
return dataset;
}
private static List<Iris> loadTestData(String filePath) throws IOException {
List<Iris> dataset = new ArrayList<>();
BufferedReader reader = new BufferedReader(new FileReader(filePath));
String line;
while ((line = reader.readLine()) != null) {
String[] values = line.split(",");
double sepalLength = Double.parseDouble(values[0]);
double sepalWidth = Double.parseDouble(values[1]);
double petalLength = Double.parseDouble(values[2]);
double petalWidth = Double.parseDouble(values[3]);
dataset.add(new Iris(sepalLength, sepalWidth, petalLength, petalWidth, "unknown"));
}
reader.close();
return dataset;
}
}
```
在上述示例中,我们定义了`Iris`类来表示鸢尾花的属性和类别,并定义了`DecisionTree`类来表示决策树模型。`Node`是一个抽象类,它有两个子类,`LeafNode`表示叶节点,`InternalNode`表示内部节点。`DecisionTreeBuilder`类负责构建决策树模型。`IrisClassifier`类是程序的入口点,加载训练数据和测试数据,并使用构建的决策树对测试数据进行分类预测。
要运行此代码,你需要创建两个数据文件:`iris_training_data.txt`和`iris_test_data.txt`,分别包含用于训练和测试的鸢尾花数据。每行数据应包含四个特征值(sepala长度、sepala宽度、petala长度和petala宽度)和一个类别值(setosa、versicolor或virginica),用逗号分隔。
此示例仅仅是一个简单的鸢尾花分类器,仅供演示目的。在实际应用中,你可能需要更复杂的特征工程和更大
规模的数据集来构建准确的分类模型。 |
|