-
Notifications
You must be signed in to change notification settings - Fork 4
/
IG_calculation.py
77 lines (62 loc) · 1.82 KB
/
IG_calculation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 5 00:36:26 2018
@author: Deepesh
"""
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 3 01:10:29 2018
@author: Deepesh
"""
import csv
import math
def calculateEntropy(data,attr):
count = {}
for i in data:
if(i[attr] in count):
count[i[attr]] = count[i[attr]]+1
else:
count[i[attr]]=1
h=0.0
for j in count.values():
h= h+ ((-1)*(j/len(data))* math.log2(j/len(data)))
return h
def calculateIG(data,attr,classLabel):
countsInSplit = {}
entropyBefore = calculateEntropy(data,classLabel)
entropyAfter = 0.0
for i in data:
if(i[attr] in countsInSplit):
countsInSplit[i[attr]] = countsInSplit[i[attr]]+1
else:
countsInSplit[i[attr]]=1
for valOfAttr in countsInSplit:
subdata = [r for r in data if(r[attr] == valOfAttr)]
entropyAfter = entropyAfter + ((countsInSplit[valOfAttr]/sum(countsInSplit.values()))* calculateEntropy(subdata,classLabel))
return entropyBefore-entropyAfter
def bestAttribute(data,header,classLabel):
data = data[:]
for testAttr in header:
if(testAttr == header[len(header)-1]):
continue
maxIG =0.0
attr = ""
IG = calculateIG(data,testAttr,header[len(header)-1])
if(IG>maxIG):
maxIG=IG
attr = testAttr
return attr
#print("Max IG = {} for attribute = {}".format(maxIG,attr))
trainFile = "D:\\MyStudy\\UTD\\sem2\\ML\\Assignment\\assign 2\\training_set - Copy.csv"
f = open(trainFile,"r")
reader = csv.reader(f)
data = []
rownum=0
for row in reader:
if(rownum == 0):
header = row
rownum = rownum+1
else:
if any(row):
row = [int(i) for i in row]
data.append(dict(zip(header,row)))