Write a program in JAVA/ C to perform supervised pattern classification using de
ID: 3672201 • Letter: W
Question
Write a program in JAVA/ C to perform supervised pattern classification using decision trees. Your program should have two components: training and application. In training mode, a data set with labeled samples will be presented to the program, and it will use those samples to generate a decision tree. In application mode, the program will use the decision tree generated by the training mode and use it to classify samples with unknown class assignment. You have to implement these two modes in one program.
The data set is in ARFF format, with one sample / pattern vector per line of input. Each line will contain a series of attribute values, separated by commas or spaces. The file has an ARFF header consisting of a set of tags beginning with the ‘@’ character. These tags provide descriptions of the attributes (name and type). The data begins after the @data tag, which denotes the end of the header.
Below is the ARFF file- ' bcwdisc.arff ' (I pasted the contents in the file here, because I can't upload the file. But the program should read from the actual ARFF file)
@relation breast_cancer
@attribute Clump_Thickness {0, 1, 2}
@attribute Uniformity_of_Cell_Size {0, 1, 2}
@attribute Uniformity_of_Cell_Shape {0, 1, 2}
@attribute Marginal_Adhesion {0, 1, 2}
@attribute Single_Epithelial_Cell_Size {0, 1, 2}
@attribute Bare_Nuclei {0, 1, 2}
@attribute Bland_Chromatin {0, 1, 2}
@attribute Normal_Nucleoli {0, 1, 2}
@attribute Mitoses {0, 1, 2}
@attribute class {2, 4}
@data
1 0 0 0 0 0 1 0 0 2
1 2 2 2 2 2 1 1 0 2
0 0 0 0 0 1 1 0 0 2
2 2 2 0 1 2 1 2 0 2
1 0 0 1 0 0 1 0 0 2
2 2 2 2 2 2 2 2 0 4
0 0 0 0 0 2 1 0 0 2
0 0 1 0 0 0 1 0 0 2
0 0 0 0 0 0 0 0 2 2
1 1 0 0 0 0 0 0 0 2
0 0 0 0 0 0 1 0 0 2
0 0 0 0 0 0 0 0 0 2
1 1 1 1 0 1 2 2 0 4
0 0 0 0 0 1 1 0 0 2
2 2 2 2 2 2 2 2 2 4
2 2 2 2 2 0 2 2 0 4
1 0 0 0 0 0 0 0 0 2
1 0 0 0 0 0 1 0 0 2
2 2 2 2 2 2 2 0 2 4
Explanation / Answer
import weka.classifiers.*; import weka.core.*; import java.io.*; import java.util.*; /** * Class implementing an Id3 decision tree classifier. */ public class Id3 extends DistributionClassifier { /** The node's successors. */ private Id3[] m_Successors; /** Attribute used for splitting. */ private Attribute m_Attribute; /** Class value if node is leaf. */ private double m_ClassValue; /** Class distribution if node is leaf. */ private double[] m_Distribution; /** Class attribute of dataset. */ private Attribute m_ClassAttribute; /** * Builds Id3 decision tree classifier. */ public void buildClassifier(Instances data) throws Exception { if (!data.classAttribute().isNominal()) { throw new Exception(“Id3: nominal class, please.”); } Enumeration enumAtt = data.enumerateAttributes(); while (enumAtt.hasMoreElements()) { Attribute attr = (Attribute) enumAtt.nextElement(); if (!attr.isNominal()) { throw new Exception(“Id3: only nominal attributes, please.”); } Enumeration enum = data.enumerateInstances(); while (enum.hasMoreElements()) { if (((Instance) enum.nextElement()).isMissing(attr)) { throw new Exception(“Id3: no missing values, please.”); } } } data = new Instances(data); data.deleteWithMissingClass(); makeTree(data); } /** * Method building Id3 tree. */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node.if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].buildClassifier(splitData[j]); } } } /** * Classifies a given test instance using the decision tree. */ public double classifyInstance(Instance instance) { if (m_Attribute == null) { return m_ClassValue; } else { return m_Successors[(int) instance.value(m_Attribute)]. classifyInstance(instance); } } /** * Computes class distribution for instance using decision tree. */ public double[] distributionForInstance(Instance instance) {if (m_Attribute == null) { return m_Distribution; } else { return m_Successors[(int) instance.value(m_Attribute)]. distributionForInstance(instance); } } /** * Prints the decision tree using the private toString method from below. */ public String toString() { return “Id3 classifier ============== “ + toString(0); } /** * Computes information gain for an attribute. */ private double computeInfoGain(Instances data, Attribute att) throws Exception { double infoGain = computeEntropy(data); Instances[] splitData = splitData(data, att); for (int j = 0; j < att.numValues(); j++) { if (splitData[j].numInstances() > 0) { infoGain -= ((double) splitData[j].numInstances() / (double) data.numInstances()) * computeEntropy(splitData[j]); } } return infoGain; } /** * Computes the entropy of a dataset. */ private double computeEntropy(Instances data) throws Exception { double [] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); } /** * Splits a dataset according to the values of a nominal attribute.*/ private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } return splitData; } /** * Outputs a tree at a certain level. */ private String toString(int level) { StringBuffer text = new StringBuffer(); if (m_Attribute == null) { if (Instance.isMissingValue(m_ClassValue)) { text.append(“: null”); } else { text.append(“: “+m_ClassAttribute.value((int) m_ClassValue)); } } else { for (int j = 0; j < m_Attribute.numValues(); j++) { text.append(“ ”); for (int i = 0; i < level; i++) { text.append(“| “); } text.append(m_Attribute.name() + “ = “ + m_Attribute.value(j)); text.append(m_Successors[j].toString(level + 1)); } } return text.toString(); } /** * Main method. */ public static void main(String[] args) { try { System.out.println(Evaluation.evaluateModel(new Id3(), args)); } catch (Exception e) { System.out.println(e.getMessage()); } } }
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.