[Python] 纯文本查看 复制代码
import numpy as np
class decision_tree:
def __init__(self, features, output, dataset):
self.features = features
self.output = output
self.dataset = dataset
def log(self, x):
return np.log2(x)
# you can use this function to calculate the emprical probability of a random variable under a dataset
def get_prob(self, array):
(unique, counts) = np.unique(array, return_counts=True, axis=0)
return counts/len(array)
# you can use this function to calculate the emprical entropy of a random variable under a dataset
def entropy(self, array):
p = self.get_prob(array)
return -np.sum(p*np.log2(p))
def output_entropy(self):
# calculate the emprical entropy of the output
# you can use your code in the last assignment
output_data=np.zeros((len(dataset),1))
i=0
while (i<len(dataset)):
output_data[i]=dataset[i][self.output]
i=i+1
return self.entropy(output_data)
def conditional_entropy(self, feature):
# calculate the emprical conditional entropy of the output relative to the "feature"
# you can use your code in the last assignment
RE=np.zeros((len(dataset),1))
j=0
while (j<len(dataset)):
RE[j]=dataset[j][feature]
j=j+1
GP=self.get_prob(RE)
A=np.unique(RE)
B=len(A)
k=0
C=[]
D=[]
while(k<B):
C.append([])
k=k+1
y=0
while(y<B):
x=0
while(x<len(dataset)):
if (A[y]==dataset[x][feature]):
C[y].append(dataset[x])
x=x+1
y=y+1
z=0
while(z<B):
d=np.zeros((len(C[z]),1))
i=0
while (i<len(C[z])):
d[i]=C[z][i][self.output]
i=i+1
D.append(self.entropy(d))
z=z+1
JG=0
e=0
while(e<B):
JG=JG+GP[e]*D[e]
e=e+1
return JG
def feature_selection(self,feature):
# select the feature has maximum mutual information
# you can use your code in the last assignment
i=0
k=len(feature)
E=[]
while(i<k):
f=self.output_entropy()-self.conditional_entropy(feature[i])
E.append(f)
i=i+1
g=max(E)
j=0
while(j<k):
if(g==E[j]):
sel=feature[j]
j=j+1
return sel
def predict(self, data):
# make prediction for an arbitrary data input
A=[]
k=len(self.features)
feature=self.features
while(k>0):
a=self.feature_selection(feature)
A.append(a)
feature.remove(a)
k=k-1
print(A)
j=0
B=dataset
k=len(self.features)
while(j<k):
u=0
D=[]
D.clear()
a=A[j]
while(u<len(B)):
if(data[a] == B[u][a]):
b=B[u]
D.append(b)
u=u+1
j=j+1
B=D
print(D)
return
dataset = [
{"age": 19, "male": False, "single": False, "visit_library_in_Sunday": False},
{"age": 19, "male": False, "single": False, "visit_library_in_Sunday": False},
{"age": 19, "male": True, "single": False, "visit_library_in_Sunday": True},
{"age": 19, "male": True, "single": True, "visit_library_in_Sunday": True},
{"age": 19, "male": False, "single": False, "visit_library_in_Sunday": False},
{"age": 20, "male": False, "single": False, "visit_library_in_Sunday": False},
{"age": 20, "male": False, "single": False, "visit_library_in_Sunday": False},
{"age": 20, "male": True, "single": True, "visit_library_in_Sunday": True},
{"age": 20, "male": False, "single": True, "visit_library_in_Sunday": True},
{"age": 20, "male": False, "single": True, "visit_library_in_Sunday": True},
{"age": 21, "male": False, "single": True, "visit_library_in_Sunday": True},
{"age": 21, "male": False, "single": True, "visit_library_in_Sunday": True},
{"age": 21, "male": True, "single": False, "visit_library_in_Sunday": True},
{"age": 21, "male": True, "single": False, "visit_library_in_Sunday": True},
{"age": 21, "male": False, "single": False, "visit_library_in_Sunday": False},
{"age": 21, "male": False, "single": False, "visit_library_in_Sunday": False},
{"age": 21, "male": False, "single": False, "visit_library_in_Sunday": True}
]
my_tree = decision_tree(\
["age", "male", "single"], "visit_library_in_Sunday", dataset)
# Test 1
print(my_tree.predict({"age": 19, "male": False, "single": False}), "should be 0 or False")