new Instance(instance)
+ *
+ * @return the shallow copy
+ */
+ //@ also ensures \result != null;
+ //@ also ensures \result instanceof Instance;
+ //@ also ensures ((Instance)\result).m_Dataset == m_Dataset;
+ public /*@pure@*/ Object copy() {
+
+ Instance result = new Instance(this);
+ result.m_Dataset = m_Dataset;
+ return result;
+ }
+
public double GetElementAt(int index){
return m_AttValues[index];
}
diff --git a/src/gaknn/core/Instances.java b/src/gaknn/core/Instances.java
index 83f4752..60f3152 100644
--- a/src/gaknn/core/Instances.java
+++ b/src/gaknn/core/Instances.java
@@ -1,5 +1,11 @@
package gaknn.core;
+
+
+
+
+//import Instance;
+
/**
* Instances class represents the data.
*
@@ -39,6 +45,9 @@ public Instances(String name,
m_RelationName = name;
m_ClassIndex = -1;
m_Attributes = attInfo;
+
+ //initialize the m_instances
+ m_Instances=new FastVector();
for (int i=0; i
+ * @misc{missing_id,
+ * author = {Wikipedia},
+ * title = {Euclidean distance},
+ * URL = {http://en.wikipedia.org/wiki/Euclidean_distance}
+ * }
+ *
+ *
+
+ *
+
+ * Valid options are:
+ *
+ * -D + * Turns off the normalization of attribute + * values in distance calculation.+ * + *
-R <col1,col2-col4,...> + * Specifies list of columns to used in the calculation of the + * distance. 'first' and 'last' are valid indices. + * (default: first-last)+ * + *
-V + * Invert matching sense of column indices.+ * + + * + * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) + * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) + * @author FracPete (fracpete at waikato dot ac dot nz) + * @version $Revision: 1.13 $ + */ +public class EuclideanDistance + extends NormalizableDistance + implements Cloneable { + + /** for serialization. */ + private static final long serialVersionUID = 1068606253458807903L; + + /** + * Constructs an Euclidean Distance object, Instances must be still set. + */ + public EuclideanDistance() { + super(); + } + + /** + * Constructs an Euclidean Distance object and automatically initializes the + * ranges. + * + * @param data the instances the distance function should work on + */ + public EuclideanDistance(Instances data,double[] weights) { + super(data); + m_Weights=weights; + } + + /** + * Returns a string describing this object. + * + * @return a description of the evaluator suitable for + * displaying in the explorer/experimenter gui + */ +// public String globalInfo() { +// return +// "Implementing Euclidean distance (or similarity) function.\n\n" +// + "One object defines not one distance but the data model in which " +// + "the distances between objects of that data model can be computed.\n\n" +// + "Attention: For efficiency reasons the use of consistency checks " +// + "(like are the data models of the two instances exactly the same), " +// + "is low.\n\n" +// + "For more information, see:\n\n" +// + getTechnicalInformation().toString(); +// } +// +// /** +// * Returns an instance of a TechnicalInformation object, containing +// * detailed information about the technical background of this class, +// * e.g., paper reference or book this class is based on. +// * +// * @return the technical information about this class +// */ +// public TechnicalInformation getTechnicalInformation() { +// TechnicalInformation result; +// +// result = new TechnicalInformation(Type.MISC); +// result.setValue(Field.AUTHOR, "Wikipedia"); +// result.setValue(Field.TITLE, "Euclidean distance"); +// result.setValue(Field.URL, "http://en.wikipedia.org/wiki/Euclidean_distance"); +// +// return result; +// } + + /** + * Calculates the distance between two instances. + * + * @param first the first instance + * @param second the second instance + * @return the distance between the two given instances + */ + public double distance(Instance first, Instance second) { + return Math.sqrt(distance(first, second, Double.POSITIVE_INFINITY)); + } + + /** + * Calculates the distance (or similarity) between two instances. Need to + * pass this returned distance later on to postprocess method to set it on + * correct scale.
+ * @article{Friedman1977,
+ * author = {Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel},
+ * journal = {ACM Transactions on Mathematics Software},
+ * month = {September},
+ * number = {3},
+ * pages = {209-226},
+ * title = {An Algorithm for Finding Best Matches in Logarithmic Expected Time},
+ * volume = {3},
+ * year = {1977}
+ * }
+ *
+ * @techreport{Moore1991,
+ * author = {Andrew Moore},
+ * booktitle = {University of Cambridge Computer Laboratory Technical Report No. 209},
+ * howpublished = {Extract from PhD Thesis},
+ * title = {A tutorial on kd-trees},
+ * year = {1991},
+ * HTTP = {Available from http://www.autonlab.org/autonweb/14665.html}
+ * }
+ *
+ *
+
+ *
+
+ * Valid options are:
+ *
+ * -S <classname and options> + * Node splitting method to use. + * (default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)+ * + *
-W <value> + * Set minimal width of a box + * (default: 1.0E-2).+ * + *
-L + * Maximal number of instances in a leaf + * (default: 40).+ * + *
-N + * Normalizing will be done + * (Select dimension for split, with normalising to universe).+ * + + * + * @author Gabi Schmidberger (gabi[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) + * @author Malcolm Ware (mfw4[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) + * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) + * @version $Revision: 1.3 $ + */ +public class KDTree + extends NearestNeighbourSearch + { + + /** For serialization. */ + private static final long serialVersionUID = 1505717283763272533L; + + /** + * Array holding the distances of the nearest neighbours. It is filled up both + * by nearestNeighbour() and kNearestNeighbours(). + */ + protected double[] m_DistanceList; + + /** + * Indexlist of the instances of this kdtree. Instances get sorted according + * to the splits. the nodes of the KDTree just hold their start and end + * indices + */ + protected int[] m_InstList; + + /** The root node of the tree. */ + protected KDTreeNode m_Root; + + /** The node splitter. */ + protected KDTreeNodeSplitter m_Splitter = new SlidingMidPointOfWidestSide(); + + /** Tree stats. */ + protected int m_NumNodes, m_NumLeaves, m_MaxDepth; + + /** Tree Stats variables. */ + protected TreePerformanceStats m_TreeStats = null; + + // Constants + /** The index of MIN value in attributes' range array. */ + public static final int MIN = EuclideanDistance.R_MIN; + + /** The index of MAX value in attributes' range array. */ + public static final int MAX = EuclideanDistance.R_MAX; + + /** The index of WIDTH (MAX-MIN) value in attributes' range array. */ + public static final int WIDTH = EuclideanDistance.R_WIDTH; + + /** keep the weights. */ + //@author thimal + protected static double[] m_Weights; + + + /** + * Creates a new instance of KDTree. + */ + public KDTree() { + super(); + if (getMeasurePerformance()) + m_Stats = m_TreeStats = new TreePerformanceStats(); + } + + /** + * Creates a new instance of KDTree. + * It also builds the tree on supplied set of Instances. + * @param insts The instances/points on which the BallTree + * should be built on. + */ + public KDTree(Instances insts) { + super(insts); + if (getMeasurePerformance()) + m_Stats = m_TreeStats = new TreePerformanceStats(); + } + + /** + * Builds the KDTree on the supplied set of instances/points. It + * is adviseable to run the replace missing attributes filter + * on the passed instances first. + * NOTE: This method should not be called from outside this + * class. Outside classes should call setInstances(Instances) + * instead. + * + * @param instances The instances to build the tree on + * @throws Exception if something goes wrong + */ + protected void buildKDTree(Instances instances) throws Exception { + + checkMissing(instances); + if (m_EuclideanDistance == null) + m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance( + instances,m_Weights); + else + m_EuclideanDistance.setInstances(instances); + + m_Instances = instances; + int numInst = m_Instances.Size(); + + // Make the global index list + m_InstList = new int[numInst]; + + for (int i = 0; i < numInst; i++) { + m_InstList[i] = i; + } + + double[][] universe = m_EuclideanDistance.getRanges(); + + // initializing internal fields of KDTreeSplitter + m_Splitter.setInstances(m_Instances); + m_Splitter.setInstanceList(m_InstList); + m_Splitter.setEuclideanDistanceFunction(m_EuclideanDistance); + m_Splitter.setNodeWidthNormalization(m_NormalizeNodeWidth); + + // building tree + m_NumNodes = m_NumLeaves = 1; + m_MaxDepth = 0; + m_Root = new KDTreeNode(m_NumNodes, 0, m_Instances.Size() - 1, + universe); + + splitNodes(m_Root, universe, m_MaxDepth + 1); + } + + /** + * Recursively splits nodes of a tree starting from the supplied node. + * The splitting stops for any node for which the number of instances/points + * falls below a given threshold (given by m_MaxInstInLeaf), or if the + * maximum relative width/range of the instances/points + * (i.e. max_i(max(att_i) - min(att_i)) ) falls below a given threshold + * (given by m_MinBoxRelWidth). + * + * @param node The node to start splitting from. + * @param universe The attribute ranges of the whole dataset. + * @param depth The depth of the supplied node. + * @throws Exception If there is some problem + * splitting. + */ + protected void splitNodes(KDTreeNode node, double[][] universe, + int depth) throws Exception { + double[][] nodeRanges = m_EuclideanDistance.initializeRanges(m_InstList, + node.m_Start, node.m_End); + if (node.numInstances() <= m_MaxInstInLeaf + || getMaxRelativeNodeWidth(nodeRanges, universe) <= m_MinBoxRelWidth) + return; + + // splitting a node so it is no longer a leaf + m_NumLeaves--; + + if (depth > m_MaxDepth) + m_MaxDepth = depth; + + m_Splitter.splitNode(node, m_NumNodes, nodeRanges, universe); + m_NumNodes += 2; + m_NumLeaves += 2; + + splitNodes(node.m_Left, universe, depth + 1); + splitNodes(node.m_Right, universe, depth + 1); + } + + /** + * Returns (in the supplied heap object) the k nearest + * neighbours of the given instance starting from the give + * tree node. >k neighbours are returned if there are more than + * one neighbours at the kth boundary. NOTE: This method should + * not be used from outside this class. Outside classes should + * call kNearestNeighbours(Instance, int). + * + * @param target The instance to find the nearest neighbours for. + * @param node The KDTreeNode to start the search from. + * @param k The number of neighbours to find. + * @param heap The MyHeap object to store/update the kNNs found + * during the search. + * @param distanceToParents The distance of the supplied target + * to the parents of the supplied tree node. + * @throws Exception if the nearest neighbour could not be found. + */ + protected void findNearestNeighbours(Instance target, KDTreeNode node, int k, + MyHeap heap, double distanceToParents) throws Exception { + if (node.isALeaf()) { + if (m_TreeStats != null) { + m_TreeStats.updatePointCount(node.numInstances()); + m_TreeStats.incrLeafCount(); + } + double distance; + // look at all the instances in this leaf + for (int idx = node.m_Start; idx <= node.m_End; idx++) { + if (target == m_Instances.instance(m_InstList[idx])) // for + // hold-one-out + // cross-validation + continue; + if (heap.size() < k) { + distance = m_EuclideanDistance.distance(target, m_Instances + .instance(m_InstList[idx]), Double.POSITIVE_INFINITY, m_Stats); + heap.put(m_InstList[idx], distance); + } else { + MyHeapElement temp = heap.peek(); + distance = m_EuclideanDistance.distance(target, m_Instances + .instance(m_InstList[idx]), temp.distance, m_Stats); + if (distance < temp.distance) { + heap.putBySubstitute(m_InstList[idx], distance); + } else if (distance == temp.distance) { + heap.putKthNearest(m_InstList[idx], distance); + } + }// end else heap.size==k + }// end for + + } else { + if (m_TreeStats != null) { + m_TreeStats.incrIntNodeCount(); + } + KDTreeNode nearer, further; + boolean targetInLeft = m_EuclideanDistance.valueIsSmallerEqual(target, + node.m_SplitDim, node.m_SplitValue); + + if (targetInLeft) { + nearer = node.m_Left; + further = node.m_Right; + } else { + nearer = node.m_Right; + further = node.m_Left; + } + findNearestNeighbours(target, nearer, k, heap, distanceToParents); + + // ... now look in further half if maxDist reaches into it + if (heap.size() < k) { // if haven't found the first k + double distanceToSplitPlane = distanceToParents + + m_EuclideanDistance.sqDifference(node.m_SplitDim, target + .GetElementAt(node.m_SplitDim), node.m_SplitValue); + findNearestNeighbours(target, further, k, heap, distanceToSplitPlane); + return; + } else { // else see if ball centered at query intersects with the other + // side. + double distanceToSplitPlane = distanceToParents + + m_EuclideanDistance.sqDifference(node.m_SplitDim, target + .GetElementAt(node.m_SplitDim), node.m_SplitValue); + if (heap.peek().distance >= distanceToSplitPlane) { + findNearestNeighbours(target, further, k, heap, distanceToSplitPlane); + } + }// end else + }// end else_if an internal node + } + + /** + * Returns the k nearest neighbours of the supplied instance. + * >k neighbours are returned if there are more than one + * neighbours at the kth boundary. + * + * @param target The instance to find the nearest neighbours for. + * @param k The number of neighbours to find. + * @return The k nearest neighbours (or >k if more there are than + * one neighbours at the kth boundary). + * @throws Exception if the nearest neighbour could not be found. + */ + public Instances kNearestNeighbours(Instance target, int k) throws Exception { + checkMissing(target); + + if (m_Stats != null) + m_Stats.searchStart(); + + MyHeap heap = new MyHeap(k); + findNearestNeighbours(target, m_Root, k, heap, 0.0); + + if (m_Stats != null) + m_Stats.searchFinish(); + + Instances neighbours = new Instances(m_Instances, (heap.size() + heap.noOfKthNearest())); + m_DistanceList = new double[heap.size() + heap.noOfKthNearest()]; + int[] indices = new int[heap.size() + heap.noOfKthNearest()]; + int i = indices.length - 1; + MyHeapElement h; + while (heap.noOfKthNearest() > 0) { + h = heap.getKthNearest(); + indices[i] = h.index; + m_DistanceList[i] = h.distance; + i--; + } + while (heap.size() > 0) { + h = heap.get(); + indices[i] = h.index; + m_DistanceList[i] = h.distance; + i--; + } + m_DistanceFunction.postProcessDistances(m_DistanceList); + for (int idx = 0; idx < indices.length; idx++) { + neighbours.add(m_Instances.instance(indices[idx])); + } + + return neighbours; + } + + + /** + * Returns the nearest neighbour of the supplied target + * instance. + * + * @param target The instance to find the nearest neighbour for. + * @return The nearest neighbour from among the previously + * supplied training instances. + * @throws Exception if the neighbours could not be found. + */ + public Instance nearestNeighbour(Instance target) throws Exception { + return (kNearestNeighbours(target, 1)).instance(0); + } + + /** + * Returns the distances to the kNearest or 1 nearest neighbour currently + * found with either the kNearestNeighbours or the nearestNeighbour method. + * + * @return array containing the distances of the + * nearestNeighbours. The length and ordering of the array + * is the same as that of the instances returned by + * nearestNeighbour functions. + * @throws Exception if called before calling kNearestNeighbours or + * nearestNeighbours. + */ + public double[] getDistances() throws Exception { + if (m_Instances == null || m_DistanceList == null) + throw new Exception("The tree has not been supplied with a set of " + + "instances or getDistances() has been called " + + "before calling kNearestNeighbours()."); + return m_DistanceList; + } + + + /** + * Builds the KDTree on the given set of instances. + * @param instances The insts on which the KDTree is to be + * built. + * @throws Exception If some error occurs while + * building the KDTree + */ + public void setInstances(Instances instances) throws Exception { + super.setInstances(instances); + buildKDTree(instances); + } + + + /** + * Adds one instance to the KDTree. This updates the KDTree structure to take + * into account the newly added training instance. + * + * @param instance the instance to be added. Usually the newly added instance in the + * training set. + * @throws Exception If the instance cannot be added. + */ + public void update(Instance instance) throws Exception { // better to change + // to addInstance + if (m_Instances == null) + throw new Exception("No instances supplied yet. Have to call " + + "setInstances(instances) with a set of Instances " + "first."); + + addInstanceInfo(instance); + addInstanceToTree(instance, m_Root); + } + + /** + * Recursively adds an instance to the tree starting from + * the supplied KDTreeNode. + * NOTE: This should not be called by outside classes, + * outside classes should instead call update(Instance) + * method. + * + * @param inst The instance to add to the tree + * @param node The node to start the recursive search + * from, for the leaf node where the supplied instance + * would go. + * @throws Exception If some error occurs while adding + * the instance. + */ + protected void addInstanceToTree(Instance inst, KDTreeNode node) + throws Exception { + if (node.isALeaf()) { + int instList[] = new int[m_Instances.Size()]; + try { + System.arraycopy(m_InstList, 0, instList, 0, node.m_End + 1); // m_InstList.squeezeIn(m_End, + // index); + if (node.m_End < m_InstList.length - 1) + System.arraycopy(m_InstList, node.m_End + 1, instList, + node.m_End + 2, m_InstList.length - node.m_End - 1); + instList[node.m_End + 1] = m_Instances.Size() - 1; + } catch (ArrayIndexOutOfBoundsException ex) { + System.err.println("m_InstList.length: " + m_InstList.length + + " instList.length: " + instList.length + "node.m_End+1: " + + (node.m_End + 1) + "m_InstList.length-node.m_End+1: " + + (m_InstList.length - node.m_End - 1)); + throw ex; + } + m_InstList = instList; + + node.m_End++; + node.m_NodeRanges = m_EuclideanDistance.updateRanges(inst, + node.m_NodeRanges); + + m_Splitter.setInstanceList(m_InstList); + + // split this leaf node if necessary + double[][] universe = m_EuclideanDistance.getRanges(); + if (node.numInstances() > m_MaxInstInLeaf + && getMaxRelativeNodeWidth(node.m_NodeRanges, universe) > m_MinBoxRelWidth) { + m_Splitter.splitNode(node, m_NumNodes, node.m_NodeRanges, universe); + m_NumNodes += 2; + } + }// end if node is a leaf + else { + if (m_EuclideanDistance.valueIsSmallerEqual(inst, node.m_SplitDim, + node.m_SplitValue)) { + addInstanceToTree(inst, node.m_Left); + afterAddInstance(node.m_Right); + } else + addInstanceToTree(inst, node.m_Right); + + node.m_End++; + node.m_NodeRanges = m_EuclideanDistance.updateRanges(inst, + node.m_NodeRanges); + } + } + + /** + * Corrects the start and end indices of a + * KDTreeNode after an instance is added to + * the tree. The start and end indices for + * the master index array (m_InstList) + * stored in the nodes need to be updated + * for all nodes in the subtree on the + * right of a node where the instance + * was added. + * NOTE: No outside class should call this + * method. + * + * @param node KDTreeNode whose start and end indices + * need to be updated. + */ + protected void afterAddInstance(KDTreeNode node) { + node.m_Start++; + node.m_End++; + if (!node.isALeaf()) { + afterAddInstance(node.m_Left); + afterAddInstance(node.m_Right); + } + } + + /** + * Adds one instance to KDTree loosly. It only changes the ranges in + * EuclideanDistance, and does not affect the structure of the KDTree. + * + * @param instance the new instance. Usually this is the test instance + * supplied to update the range of attributes in the distance function. + */ + public void addInstanceInfo(Instance instance) { + m_EuclideanDistance.updateRanges(instance); + } + + /** + * Checks if there is any instance with missing values. Throws an exception if + * there is, as KDTree does not handle missing values. + * + * @param instances the instances to check + * @throws Exception if missing values are encountered + */ + protected void checkMissing(Instances instances) throws Exception { + for (int i = 0; i < instances.Size(); i++) { + Instance ins = instances.instance(i); + for (int j = 0; j < ins.numValues(); j++) { + if (ins.index(j) != ins.GetClassIndex()) + if (ins.isMissing(j)) { + throw new Exception("ERROR: KDTree can not deal with missing " + + "values. Please run ReplaceMissingValues filter " + + "on the dataset before passing it on to the KDTree."); + } + } + } + } + + /** + * Checks if there is any missing value in the given + * instance. + * @param ins The instance to check missing values in. + * @throws Exception If there is a missing value in the + * instance. + */ + protected void checkMissing(Instance ins) throws Exception { + for (int j = 0; j < ins.numValues(); j++) { + if (ins.index(j) != ins.GetClassIndex()) + if (ins.isMissing(j)) { + throw new Exception("ERROR: KDTree can not deal with missing " + + "values. Please run ReplaceMissingValues filter " + + "on the dataset before passing it on to the KDTree."); + } + } + } + + /** + * Returns the maximum attribute width of instances/points + * in a KDTreeNode relative to the whole dataset. + * + * @param nodeRanges The attribute ranges of the + * KDTreeNode whose maximum relative width is to be + * determined. + * @param universe The attribute ranges of the whole + * dataset (training instances + test instances so + * far encountered). + * @return The maximum relative width + */ + protected double getMaxRelativeNodeWidth(double[][] nodeRanges, + double[][] universe) { + int widest = widestDim(nodeRanges, universe); + if(widest < 0) + return 0.0; + else + return nodeRanges[widest][WIDTH] / universe[widest][WIDTH]; + } + + /** + * Returns the widest dimension/attribute in a + * KDTreeNode (widest after normalizing). + * @param nodeRanges The attribute ranges of + * the KDTreeNode. + * @param universe The attribute ranges of the + * whole dataset (training instances + test + * instances so far encountered). + * @return The index of the widest + * dimension/attribute. + */ + protected int widestDim(double[][] nodeRanges, double[][] universe) { + final int classIdx = m_Instances.GetClassIndex(); + double widest = 0.0; + int w = -1; + if (m_NormalizeNodeWidth) { + for (int i = 0; i < nodeRanges.length; i++) { + double newWidest = nodeRanges[i][WIDTH] / universe[i][WIDTH]; + if (newWidest > widest) { + if (i == classIdx) + continue; + widest = newWidest; + w = i; + } + } + } else { + for (int i = 0; i < nodeRanges.length; i++) { + if (nodeRanges[i][WIDTH] > widest) { + if (i == classIdx) + continue; + widest = nodeRanges[i][WIDTH]; + w = i; + } + } + } + return w; + } + + /** + * Returns the size of the tree. + * + * @return the size of the tree + */ + public double measureTreeSize() { + return m_NumNodes; + } + + /** + * Returns the number of leaves. + * + * @return the number of leaves + */ + public double measureNumLeaves() { + return m_NumLeaves; + } + + /** + * Returns the depth of the tree. + * + * @return The depth of the tree + */ + public double measureMaxDepth() { + return m_MaxDepth; + } + + /** + * Returns an enumeration of the additional measure names. + * + * @return an enumeration of the measure names + */ + public Enumeration enumerateMeasures() { + Vector newVector = new Vector(); + newVector.addElement("measureTreeSize"); + newVector.addElement("measureNumLeaves"); + newVector.addElement("measureMaxDepth"); + if (m_Stats != null) { + for (Enumeration e = m_Stats.enumerateMeasures(); e.hasMoreElements();) { + newVector.addElement(e.nextElement()); + } + } + return newVector.elements(); + } + + /** + * Returns the value of the named measure. + * + * @param additionalMeasureName the name of + * the measure to query for its value. + * @return The value of the named measure + * @throws IllegalArgumentException If the named measure + * is not supported. + */ + public double getMeasure(String additionalMeasureName) { + if (additionalMeasureName.compareToIgnoreCase("measureMaxDepth") == 0) { + return measureMaxDepth(); + } else if (additionalMeasureName.compareToIgnoreCase("measureTreeSize") == 0) { + return measureTreeSize(); + } else if (additionalMeasureName.compareToIgnoreCase("measureNumLeaves") == 0) { + return measureNumLeaves(); + } else if (m_Stats != null) { + return m_Stats.getMeasure(additionalMeasureName); + } else { + throw new IllegalArgumentException(additionalMeasureName + + " not supported (KDTree)"); + } + } + + /** + * Sets whether to calculate the performance statistics or not. + * @param measurePerformance Should be true if performance + * statistics are to be measured. + */ + public void setMeasurePerformance(boolean measurePerformance) { + m_MeasurePerformance = measurePerformance; + if (m_MeasurePerformance) { + if (m_Stats == null) + m_Stats = m_TreeStats = new TreePerformanceStats(); + } else + m_Stats = m_TreeStats = null; + } + + /** + * Assigns instances to centers using KDTree. + * + * @param centers the current centers + * @param assignments the centerindex for each instance + * @param pc the threshold value for pruning. + * @throws Exception If there is some problem + * assigning instances to centers. + */ + public void centerInstances(Instances centers, int[] assignments, double pc) + throws Exception { + + int[] centList = new int[centers.Size()]; + for (int i = 0; i < centers.Size(); i++) + centList[i] = i; + + determineAssignments(m_Root, centers, centList, assignments, pc); + } + + /** + * Assigns instances to the current centers called candidates. + * + * @param node The node to start assigning the instances from. + * @param centers all the current centers. + * @param candidates the current centers the method works on. + * @param assignments the center index for each instance. + * @param pc the threshold value for pruning. + * @throws Exception If there is some problem assigning + * instances to centers. + */ + protected void determineAssignments(KDTreeNode node, Instances centers, + int[] candidates, int[] assignments, double pc) throws Exception { + + // reduce number of owners for current hyper rectangle + int[] owners = refineOwners(node, centers, candidates); + + // only one owner + if (owners.length == 1) { + // all instances of this node are owned by one center + for (int i = node.m_Start; i <= node.m_End; i++) { + assignments[m_InstList[i]] // the assignment of this instance + = owners[0]; // is the current owner + } + } else if (!node.isALeaf()) { + // more than one owner and it is not a leaf + determineAssignments(node.m_Left, centers, owners, assignments, pc); + determineAssignments(node.m_Right, centers, owners, assignments, pc); + } else { + // this is a leaf and there are more than 1 owner + // XMeans. + assignSubToCenters(node, centers, owners, assignments); + } + } + + /** + * Refines the ownerlist. + * + * @param node The current tree node. + * @param centers all centers + * @param candidates the indexes of those centers that are candidates. + * @return list of owners + * @throws Exception If some problem occurs in refining. + */ + protected int[] refineOwners(KDTreeNode node, Instances centers, + int[] candidates) throws Exception { + + int[] owners = new int[candidates.length]; + double minDistance = Double.POSITIVE_INFINITY; + int ownerIndex = -1; + Instance owner; + int numCand = candidates.length; + double[] distance = new double[numCand]; + boolean[] inside = new boolean[numCand]; + for (int i = 0; i < numCand; i++) { + distance[i] = distanceToHrect(node, centers.instance(candidates[i])); + inside[i] = (distance[i] == 0.0); + if (distance[i] < minDistance) { + minDistance = distance[i]; + ownerIndex = i; + } + } + owner = new Instance(centers.instance(candidates[ownerIndex])); + + // are there other owners + // loop also goes over already found owner, keeps order + // in owner list + int index = 0; + for (int i = 0; i < numCand; i++) { + // 1. all centers that are points within rectangle are owners + if ((inside[i]) + + // 2. take all points with same distance to the rect. as the owner + || (distance[i] == distance[ownerIndex])) { + + // add competitor to owners list + owners[index++] = candidates[i]; + } else { + + Instance competitor = new Instance(centers.instance(candidates[i])); + if + + // 3. point has larger distance to rectangle but still can compete + // with owner for some points in the rectangle + (!candidateIsFullOwner(node, owner, competitor)) + + { + // also add competitor to owners list + owners[index++] = candidates[i]; + } + } + } + int[] result = new int[index]; + for (int i = 0; i < index; i++) + result[i] = owners[i]; + return result; + } + + /** + * Returns the distance between a point and an hyperrectangle. + * + * @param node The current node from whose hyperrectangle + * the distance is to be measured. + * @param x the point + * @return the distance + * @throws Exception If some problem occurs in determining + * the distance to the hyperrectangle. + */ + protected double distanceToHrect(KDTreeNode node, Instance x) throws Exception { + double distance = 0.0; + + Instance closestPoint = new Instance(x); + boolean inside; + inside = clipToInsideHrect(node, closestPoint); + if (!inside) + distance = m_EuclideanDistance.distance(closestPoint, x); + return distance; + } + + /** + * Finds the closest point in the hyper rectangle to a given point. Change the + * given point to this closest point by clipping of at all the dimensions to + * be clipped of. If the point is inside the rectangle it stays unchanged. The + * return value is true if the point was not changed, so the the return value + * is true if the point was inside the rectangle. + * + * @param node The current KDTreeNode in whose hyperrectangle the closest + * point is to be found. + * @param x a point + * @return true if the input point stayed unchanged. + */ + protected boolean clipToInsideHrect(KDTreeNode node, Instance x) { + boolean inside = true; + for (int i = 0; i < m_Instances.NumAttributes(); i++) { + // TODO treat nominals differently!?? + if (x.GetElementAt(i) < node.m_NodeRanges[i][MIN]) { + x.setValue(i, node.m_NodeRanges[i][MIN]); + inside = false; + } else if (x.GetElementAt(i) > node.m_NodeRanges[i][MAX]) { + x.setValue(i, node.m_NodeRanges[i][MAX]); + inside = false; + } + } + return inside; + } + + /** + * Returns true if candidate is a full owner in respect to a competitor. + *
+ * + * The candidate has been the closer point to the current rectangle or even + * has been a point within the rectangle. The competitor is competing with the + * candidate for a few points out of the rectangle although it is a point + * further away from the rectangle then the candidate. The extrem point is the + * corner of the rectangle that is furthest away from the candidate towards + * the direction of the competitor. + * + * If the distance candidate to this extreme point is smaller then the + * distance competitor to this extreme point, then it is proven that none of + * the points in the rectangle can be owned be the competitor and the + * candidate is full owner of the rectangle in respect to this competitor. See + * also D. Pelleg and A. Moore's paper 'Accelerating exact k-means Algorithms + * with Geometric Reasoning'. + *
+ *
+ * @param node The current KDTreeNode / hyperrectangle.
+ * @param candidate instance that is candidate to be owner
+ * @param competitor instance that competes against the candidate
+ * @return true if candidate is full owner
+ * @throws Exception If some problem occurs.
+ */
+ protected boolean candidateIsFullOwner(KDTreeNode node, Instance candidate,
+ Instance competitor) throws Exception {
+ // get extreme point
+ Instance extreme = new Instance(candidate);
+ for (int i = 0; i < m_Instances.NumAttributes(); i++) {
+ if ((competitor.GetElementAt(i) - candidate.GetElementAt(i)) > 0) {
+ extreme.setValue(i, node.m_NodeRanges[i][MAX]);
+ } else {
+ extreme.setValue(i, node.m_NodeRanges[i][MIN]);
+ }
+ }
+ boolean isFullOwner = m_EuclideanDistance.distance(extreme, candidate) < m_EuclideanDistance
+ .distance(extreme, competitor);
+
+ return isFullOwner;
+ }
+
+ /**
+ * Assigns instances of this node to center. Center to be assign to is decided
+ * by the distance function.
+ *
+ * @param node The KDTreeNode whose instances are to be assigned.
+ * @param centers all the input centers
+ * @param centList the list of centers to work with
+ * @param assignments index list of last assignments
+ * @throws Exception If there is error assigning the instances.
+ */
+ public void assignSubToCenters(KDTreeNode node, Instances centers,
+ int[] centList, int[] assignments) throws Exception {
+ // todo: undecided situations
+ int numCent = centList.length;
+
+ // WARNING: assignments is "input/output-parameter"
+ // should not be null and the following should not happen
+ if (assignments == null) {
+ assignments = new int[m_Instances.Size()];
+ for (int i = 0; i < assignments.length; i++) {
+ assignments[i] = -1;
+ }
+ }
+
+ // set assignments for all instances of this node
+ for (int i = node.m_Start; i <= node.m_End; i++) {
+ int instIndex = m_InstList[i];
+ Instance inst = m_Instances.instance(instIndex);
+ // if (instList[i] == 664) System.out.println("664***");
+ int newC = m_EuclideanDistance.closestPoint(inst, centers, centList);
+ // int newC = clusterProcessedInstance(inst, centers);
+ assignments[instIndex] = newC;
+ }
+ }
+
+ /**
+ * Properties' variables =====================================================
+ */
+
+ /** flag for normalizing. */
+ boolean m_NormalizeNodeWidth = true;
+
+ /** The euclidean distance function to use. */
+ protected EuclideanDistance m_EuclideanDistance;
+ { // to make sure we have only one object of EuclideanDistance
+ if (m_DistanceFunction instanceof EuclideanDistance)
+ m_EuclideanDistance = (EuclideanDistance) m_DistanceFunction;
+ else
+ m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance();
+ }
+
+ /** minimal relative width of a KDTree rectangle. */
+ protected double m_MinBoxRelWidth = 1.0E-2;
+
+ /** maximal number of instances in a leaf. */
+ protected int m_MaxInstInLeaf = 40;
+
+ /**
+ * the GET and SET - functions ===============================================
+ */
+
+ /**
+ * Tip text for this property.
+ *
+ * @return the tip text for this property
+ */
+ public String minBoxRelWidthTipText() {
+ return "The minimum relative width of the box. A node is only made a leaf "
+ + "if the width of the split dimension of the instances in a node "
+ + "normalized over the width of the split dimension of all the "
+ + "instances is less than or equal to this minimum relative width.";
+ }
+
+ /**
+ * Sets the minimum relative box width.
+ *
+ * @param i the minimum relative box width
+ */
+ public void setMinBoxRelWidth(double i) {
+ m_MinBoxRelWidth = i;
+ }
+
+ /**
+ * Gets the minimum relative box width.
+ *
+ * @return the minimum relative box width
+ */
+ public double getMinBoxRelWidth() {
+ return m_MinBoxRelWidth;
+ }
+
+ /**
+ * Tip text for this property.
+ *
+ * @return the tip text for this property
+ */
+ public String maxInstInLeafTipText() {
+ return "The max number of instances in a leaf.";
+ }
+
+ /**
+ * Sets the maximum number of instances in a leaf.
+ *
+ * @param i the maximum number of instances in a leaf
+ */
+ public void setMaxInstInLeaf(int i) {
+ m_MaxInstInLeaf = i;
+ }
+
+ /**
+ * Get the maximum number of instances in a leaf.
+ *
+ * @return the maximum number of instances in a leaf
+ */
+ public int getMaxInstInLeaf() {
+ return m_MaxInstInLeaf;
+ }
+
+ /**
+ * Tip text for this property.
+ *
+ * @return the tip text for this property
+ */
+ public String normalizeNodeWidthTipText() {
+ return "Whether if the widths of the KDTree node should be normalized "
+ + "by the width of the universe or not. "
+ + "Where, width of the node is the range of the split attribute "
+ + "based on the instances in that node, and width of the "
+ + "universe is the range of the split attribute based on all the "
+ + "instances (default: false).";
+ }
+
+ /**
+ * Sets the flag for normalizing the widths of a KDTree Node by the width of
+ * the dimension in the universe.
+ *
+ * @param n true to use normalizing.
+ */
+ public void setNormalizeNodeWidth(boolean n) {
+ m_NormalizeNodeWidth = n;
+ }
+
+ /**
+ * Gets the normalize flag.
+ *
+ * @return True if normalizing
+ */
+ public boolean getNormalizeNodeWidth() {
+ return m_NormalizeNodeWidth;
+ }
+
+ /**
+ * returns the distance function currently in use.
+ *
+ * @return the distance function
+ */
+ public DistanceFunction getDistanceFunction() {
+ return (DistanceFunction) m_EuclideanDistance;
+ }
+
+ /**
+ * sets the distance function to use for nearest neighbour search.
+ *
+ * @param df the distance function to use
+ * @throws Exception if not EuclideanDistance
+ */
+ public void setDistanceFunction(DistanceFunction df) throws Exception {
+ if (!(df instanceof EuclideanDistance))
+ throw new Exception("KDTree currently only works with "
+ + "EuclideanDistanceFunction.");
+ m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;
+ }
+
+ /**
+ * Returns the tip text for this property.
+ *
+ * @return tip text for this property suitable for
+ * displaying in the explorer/experimenter gui
+ */
+ public String nodeSplitterTipText() {
+ return "The the splitting method to split the nodes of the KDTree.";
+ }
+
+ /**
+ * Returns the splitting method currently in use to split the nodes of the
+ * KDTree.
+ *
+ * @return The KDTreeNodeSplitter currently in use.
+ */
+ public KDTreeNodeSplitter getNodeSplitter() {
+ return m_Splitter;
+ }
+
+ /**
+ * Sets the splitting method to use to split the nodes of the KDTree.
+ *
+ * @param splitter The KDTreeNodeSplitter to use.
+ */
+ public void setNodeSplitter(KDTreeNodeSplitter splitter) {
+ m_Splitter = splitter;
+ }
+
+ /** set the weights
+ * @param get double array of weights. */
+ public void SetWeights(double[] weights){
+ m_Weights = weights;
+ m_EuclideanDistance.SetWeights(weights);
+ }
+
+ /**
+ * Returns a string describing this nearest neighbour search algorithm.
+ *
+ * @return a description of the algorithm for displaying in the
+ * explorer/experimenter gui
+ */
+// public String globalInfo() {
+// return
+// "Class implementing the KDTree search algorithm for nearest "
+// + "neighbour search.\n"
+// + "The connection to dataset is only a reference. For the tree "
+// + "structure the indexes are stored in an array. \n"
+// + "Building the tree:\n"
+// + "If a node has
+ *
+ *
+ * or combinations thereof. The range is internally converted from
+ * 1-based to 0-based (so methods that set or get numbers not in string
+ * format should use 0-based numbers).
+ *
+ * @author Len Trigg (trigg@cs.waikato.ac.nz)
+ * @version $Revision: 1.18 $
+ */
+public class Range
+ implements Serializable {
+
+ /** for serialization */
+ static final long serialVersionUID = 3667337062176835900L;
+
+ /** Record the string representations of the columns to delete */
+ /*@non_null spec_public@*/Vector m_RangeStrings = new Vector();
+
+ /** Whether matching should be inverted */
+ /*@spec_public@*/ boolean m_Invert;
+
+ /** The array of flags for whether an column is selected */
+ /*@spec_public@*/boolean [] m_SelectFlags;
+
+ /** Store the maximum value permitted in the range. -1 indicates that
+ no upper value has been set */
+ /*@spec_public@*/ int m_Upper = -1;
+
+ /** Default constructor. */
+ //@assignable this.*;
+ public Range() {
+ }
+
+ /**
+ * Constructor to set initial range.
+ *
+ * @param rangeList the initial range
+ * @throws IllegalArgumentException if the range list is invalid
+ */
+ public Range(/*@non_null@*/ String rangeList) {
+
+ setRanges(rangeList);
+ }
+
+ /**
+ * Sets the value of "last".
+ *
+ * @param newUpper the value of "last"
+ */
+ public void setUpper(int newUpper) {
+
+ if (newUpper >= 0) {
+ m_Upper = newUpper;
+ setFlags();
+ }
+ }
+
+ /**
+ * Gets whether the range sense is inverted, i.e. all except
+ * the values included by the range string are selected.
+ *
+ * @return whether the matching sense is inverted
+ */
+ //@ensures \result <==> m_Invert;
+ public /*@pure@*/boolean getInvert() {
+
+ return m_Invert;
+ }
+
+ /**
+ * Sets whether the range sense is inverted, i.e. all except
+ * the values included by the range string are selected.
+ *
+ * @param newSetting true if the matching sense is inverted
+ */
+ public void setInvert(boolean newSetting) {
+
+ m_Invert = newSetting;
+ }
+
+ /**
+ * Gets the string representing the selected range of values
+ *
+ * @return the range selection string
+ */
+ public /*@non_null pure@*/String getRanges() {
+
+ StringBuffer result = new StringBuffer(m_RangeStrings.size()*4);
+ boolean first = true;
+ char sep = ',';
+ for (int i = 0; i < m_RangeStrings.size(); i++) {
+ if (first) {
+ result.append((String)m_RangeStrings.elementAt(i));
+ first = false;
+ } else {
+ result.append(sep + (String)m_RangeStrings.elementAt(i));
+ }
+ }
+ return result.toString();
+ }
+
+ /**
+ * Sets the ranges from a string representation. Note that setUpper()
+ * must be called afterwards for ranges to be actually set internally.
+ *
+ * @param rangeList the comma separated list of ranges. The empty
+ * string sets the range to empty.
+ * @throws IllegalArgumentException if the rangeList was not well formed
+ */
+ //@requires rangeList != null;
+ //@assignable m_RangeStrings,m_SelectFlags;
+ public void setRanges(String rangeList) {
+
+ Vector ranges = new Vector (10);
+
+ // Split the rangeList up into the vector
+ while (!rangeList.equals("")) {
+ String range = rangeList.trim();
+ int commaLoc = rangeList.indexOf(',');
+ if (commaLoc != -1) {
+ range = rangeList.substring(0, commaLoc).trim();
+ rangeList = rangeList.substring(commaLoc + 1).trim();
+ } else {
+ rangeList = "";
+ }
+ if (!range.equals("")) {
+ ranges.addElement(range);
+ }
+ }
+ m_RangeStrings = ranges;
+ m_SelectFlags = null;
+ }
+
+ /**
+ * Gets whether the supplied cardinal number is included in the current
+ * range.
+ *
+ * @param index the number of interest
+ * @return true if index is in the current range
+ * @throws RuntimeException if the upper limit of the range hasn't been defined
+ */
+ //@requires m_Upper >= 0;
+ //@requires 0 <= index && index < m_SelectFlags.length;
+ public /*@pure@*/ boolean isInRange(int index) {
+
+ if (m_Upper == -1) {
+ throw new RuntimeException("No upper limit has been specified for range");
+ }
+ if (m_Invert) {
+ return !m_SelectFlags[index];
+ } else {
+ return m_SelectFlags[index];
+ }
+ }
+
+ /**
+ * Constructs a representation of the current range. Being a string
+ * representation, the numbers are based from 1.
+ *
+ * @return the string representation of the current range
+ */
+ public /*@non_null pure@*/ String toString() {
+
+ if (m_RangeStrings.size() == 0) {
+ return "Empty";
+ }
+ String result ="Strings: ";
+ Enumeration enu = m_RangeStrings.elements();
+ while (enu.hasMoreElements()) {
+ result += (String)enu.nextElement() + " ";
+ }
+ result += "\n";
+
+ result += "Invert: " + m_Invert + "\n";
+
+ try {
+ if (m_Upper == -1) {
+ throw new RuntimeException("Upper limit has not been specified");
+ }
+ String cols = null;
+ for (int i = 0; i < m_SelectFlags.length; i++) {
+ if (isInRange(i)) {
+ if (cols == null) {
+ cols = "Cols: " + (i + 1);
+ } else {
+ cols += "," + (i + 1);
+ }
+ }
+ }
+ if (cols != null) {
+ result += cols + "\n";
+ }
+ } catch (Exception ex) {
+ result += ex.getMessage();
+ }
+ return result;
+ }
+
+ /**
+ * Gets an array containing all the selected values, in the order
+ * that they were selected (or ascending order if range inversion is on)
+ *
+ * @return the array of selected values
+ * @throws RuntimeException if the upper limit of the range hasn't been defined
+ */
+ //@requires m_Upper >= 0;
+ public /*@non_null@*/ int [] getSelection() {
+
+ if (m_Upper == -1) {
+ throw new RuntimeException("No upper limit has been specified for range");
+ }
+ int [] selectIndices = new int [m_Upper + 1];
+ int numSelected = 0;
+ if (m_Invert)
+ {
+ for (int i = 0; i <= m_Upper; i++) {
+ if (!m_SelectFlags[i]) {
+ selectIndices[numSelected++] = i;
+ }
+ }
+ }
+ else
+ {
+ Enumeration enu = m_RangeStrings.elements();
+ while (enu.hasMoreElements()) {
+ String currentRange = (String)enu.nextElement();
+ int start = rangeLower(currentRange);
+ int end = rangeUpper(currentRange);
+ for (int i = start; (i <= m_Upper) && (i <= end); i++) {
+ if (m_SelectFlags[i]) {
+ selectIndices[numSelected++] = i;
+ }
+ }
+ }
+ }
+ int [] result = new int [numSelected];
+ System.arraycopy(selectIndices, 0, result, 0, numSelected);
+ return result;
+ }
+
+ /**
+ * Creates a string representation of the indices in the supplied array.
+ *
+ * @param indices an array containing indices to select.
+ * Since the array will typically come from a program, indices are assumed
+ * from 0, and thus will have 1 added in the String representation.
+ * @return the string representation of the indices
+ */
+ public static /*@non_null pure@*/String indicesToRangeList(/*@non_null@*/ int []indices) {
+
+ StringBuffer rl = new StringBuffer();
+ int last = -2;
+ boolean range = false;
+ for(int i = 0; i < indices.length; i++) {
+ if (i == 0) {
+ rl.append(indices[i] + 1);
+ } else if (indices[i] == last) {
+ range = true;
+ } else {
+ if (range) {
+ rl.append('-').append(last);
+ range = false;
+ }
+ rl.append(',').append(indices[i] + 1);
+ }
+ last = indices[i] + 1;
+ }
+ if (range) {
+ rl.append('-').append(last);
+ }
+ return rl.toString();
+ }
+
+ /** Sets the flags array. */
+ protected void setFlags() {
+
+ m_SelectFlags = new boolean [m_Upper + 1];
+ Enumeration enu = m_RangeStrings.elements();
+ while (enu.hasMoreElements()) {
+ String currentRange = (String)enu.nextElement();
+ if (!isValidRange(currentRange)) {
+ throw new IllegalArgumentException("Invalid range list at " + currentRange);
+ }
+ int start = rangeLower(currentRange);
+ int end = rangeUpper(currentRange);
+ for (int i = start; (i <= m_Upper) && (i <= end); i++) {
+ m_SelectFlags[i] = true;
+ }
+ }
+ }
+
+
+ /**
+ * Translates a single string selection into it's internal 0-based equivalent
+ *
+ * @param single the string representing the selection (eg: 1 first last)
+ * @return the number corresponding to the selected value
+ */
+ protected /*@pure@*/ int rangeSingle(/*@non_null@*/ String single) {
+
+ if (single.toLowerCase().equals("first")) {
+ return 0;
+ }
+ if (single.toLowerCase().equals("last")) {
+ return m_Upper;
+ }
+ int index = Integer.parseInt(single) - 1;
+ if (index < 0) {
+ index = 0;
+ }
+ if (index > m_Upper) {
+ index = m_Upper;
+ }
+ return index;
+ }
+
+ /**
+ * Translates a range into it's lower index.
+ *
+ * @param range the string representation of the range
+ * @return the lower index of the range
+ */
+ protected int rangeLower(/*@non_null@*/ String range) {
+
+ int hyphenIndex;
+ if ((hyphenIndex = range.indexOf('-')) >= 0) {
+ return Math.min(rangeLower(range.substring(0, hyphenIndex)),
+ rangeLower(range.substring(hyphenIndex + 1)));
+ }
+ return rangeSingle(range);
+ }
+
+ /**
+ * Translates a range into it's upper index. Must only be called once
+ * setUpper has been called.
+ *
+ * @param range the string representation of the range
+ * @return the upper index of the range
+ */
+ protected int rangeUpper(/*@non_null@*/ String range) {
+
+ int hyphenIndex;
+ if ((hyphenIndex = range.indexOf('-')) >= 0) {
+ return Math.max(rangeUpper(range.substring(0, hyphenIndex)),
+ rangeUpper(range.substring(hyphenIndex + 1)));
+ }
+ return rangeSingle(range);
+ }
+
+ /**
+ * Determines if a string represents a valid index or simple range.
+ * Examples: -S <classname and options>
+ * Node splitting method to use.
+ * (default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)
+ *
+ * -W <value>
+ * Set minimal width of a box
+ * (default: 1.0E-2).
+ *
+ * -L
+ * Maximal number of instances in a leaf
+ * (default: 40).
+ *
+ * -N
+ * Normalizing will be done
+ * (Select dimension for split, with normalising to universe).
+ *
+
+ *
+ * @param options the list of options as an array of strings
+ * @throws Exception if an option is not supported
+ */
+// public void setOptions(String[] options) throws Exception {
+// super.setOptions(options);
+//
+// String optionString = Utils.getOption('S', options);
+// if (optionString.length() != 0) {
+// String splitMethodSpec[] = Utils.splitOptions(optionString);
+// if (splitMethodSpec.length == 0) {
+// throw new Exception("Invalid DistanceFunction specification string.");
+// }
+// String className = splitMethodSpec[0];
+// splitMethodSpec[0] = "";
+//
+// setNodeSplitter((KDTreeNodeSplitter) Utils.forName(
+// KDTreeNodeSplitter.class, className, splitMethodSpec));
+// }
+// else {
+// setNodeSplitter(new SlidingMidPointOfWidestSide());
+// }
+//
+// optionString = Utils.getOption('W', options);
+// if (optionString.length() != 0)
+// setMinBoxRelWidth(Double.parseDouble(optionString));
+// else
+// setMinBoxRelWidth(1.0E-2);
+//
+// optionString = Utils.getOption('L', options);
+// if (optionString.length() != 0)
+// setMaxInstInLeaf(Integer.parseInt(optionString));
+// else
+// setMaxInstInLeaf(40);
+//
+// setNormalizeNodeWidth(Utils.getFlag('N', options));
+// }
+
+ /**
+ * Gets the current settings of KDtree.
+ *
+ * @return an array of strings suitable for passing to setOptions
+ */
+// public String[] getOptions() {
+// Vector
+ *
+ * For more information see also:
+ *
+ * Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand.
+ *
+
+ *
+
+ * BibTeX:
+ *
+ * @mastersthesis{Kibriya2007,
+ * address = {Hamilton, New Zealand},
+ * author = {Ashraf Masood Kibriya},
+ * school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato},
+ * title = {Fast Algorithms for Nearest Neighbour Search},
+ * year = {2007}
+ * }
+ *
+ *
+
+ *
+
+
+ *
+ * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
+ * @version $Revision: 1.2 $
+ */
+public class KMeansInpiredMethod
+ extends KDTreeNodeSplitter
+ {
+
+ /** for serialization. */
+ private static final long serialVersionUID = -866783749124714304L;
+
+ /**
+ * Returns a string describing this nearest neighbour search algorithm.
+ *
+ * @return a description of the algorithm for displaying in the
+ * explorer/experimenter gui
+ */
+// public String globalInfo() {
+// return
+// "The class that splits a node into two such that the overall sum "
+// + "of squared distances of points to their centres on both sides "
+// + "of the (axis-parallel) splitting plane is minimum.\n\n"
+// + "For more information see also:\n\n"
+// + getTechnicalInformation().toString();
+// }
+
+ /**
+ * Returns an instance of a TechnicalInformation object, containing detailed
+ * information about the technical background of this class, e.g., paper
+ * reference or book this class is based on.
+ *
+ * @return the technical information about this class
+ */
+// public TechnicalInformation getTechnicalInformation() {
+// TechnicalInformation result;
+//
+// result = new TechnicalInformation(Type.MASTERSTHESIS);
+// result.setValue(Field.AUTHOR, "Ashraf Masood Kibriya");
+// result.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search");
+// result.setValue(Field.YEAR, "2007");
+// result.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato");
+// result.setValue(Field.ADDRESS, "Hamilton, New Zealand");
+//
+// return result;
+// }
+
+ /**
+ * Splits a node into two such that the overall sum of squared distances
+ * of points to their centres on both sides of the (axis-parallel)
+ * splitting plane is minimum. The two nodes created after the whole
+ * splitting are correctly initialised. And, node.left and node.right
+ * are set appropriately.
+ * @param node The node to split.
+ * @param numNodesCreated The number of nodes that so far have been
+ * created for the tree, so that the newly created nodes are
+ * assigned correct/meaningful node numbers/ids.
+ * @param nodeRanges The attributes' range for the points inside
+ * the node that is to be split.
+ * @param universe The attributes' range for the whole
+ * point-space.
+ * @throws Exception If there is some problem in splitting the
+ * given node.
+ */
+ public void splitNode(KDTreeNode node, int numNodesCreated,
+ double[][] nodeRanges, double[][] universe) throws Exception {
+
+ correctlyInitialized();
+
+ int splitDim = -1;
+ double splitVal = Double.NEGATIVE_INFINITY;
+
+ double leftAttSum[] = new double[m_Instances.NumAttributes()],
+ rightAttSum[] = new double[m_Instances.NumAttributes()],
+ leftAttSqSum[] = new double[m_Instances.NumAttributes()],
+ rightAttSqSum[] = new double[m_Instances.NumAttributes()],
+ rightSqMean, leftSqMean, leftSqSum, rightSqSum,
+ minSum = Double.POSITIVE_INFINITY, val;
+
+ for (int dim = 0; dim < m_Instances.NumAttributes(); dim++) {
+ // m_MaxRelativeWidth in KDTree ensure there'll be atleast one dim with
+ // width > 0.0
+ if (node.m_NodeRanges[dim][WIDTH] == 0.0
+ || dim == m_Instances.GetClassIndex())
+ continue;
+
+ quickSort(m_Instances, m_InstList, dim, node.m_Start, node.m_End);
+
+ for (int i = node.m_Start; i <= node.m_End; i++) {
+ for (int j = 0; j < m_Instances.NumAttributes(); j++) {
+ if (j == m_Instances.GetClassIndex())
+ continue;
+ val = m_Instances.instance(m_InstList[i]).valueSparse(j);
+ if (m_NormalizeNodeWidth) {
+ if (Double.isNaN(universe[j][MIN])
+ || universe[j][MIN] == universe[j][MAX])
+ val = 0.0;
+ else
+ val = ((val - universe[j][MIN]) / universe[j][WIDTH]); // normalizing
+ // value
+ }
+ if (i == node.m_Start) {
+ leftAttSum[j] = rightAttSum[j] = leftAttSqSum[j] = rightAttSqSum[j] = 0.0;
+ }
+ rightAttSum[j] += val;
+ rightAttSqSum[j] += val * val;
+ }
+ }
+
+ for (int i = node.m_Start; i <= node.m_End - 1; i++) {
+ Instance inst = m_Instances.instance(m_InstList[i]);
+ leftSqSum = rightSqSum = 0.0;
+ for (int j = 0; j < m_Instances.NumAttributes(); j++) {
+ if (j == m_Instances.GetClassIndex())
+ continue;
+ val = inst.GetElementAt(j);
+
+ if (m_NormalizeNodeWidth) {
+ if (Double.isNaN(universe[j][MIN])
+ || universe[j][MIN] == universe[j][MAX])
+ val = 0.0;
+ else
+ val = ((val - universe[j][MIN]) / universe[j][WIDTH]); // normalizing
+ // value
+ }
+
+ leftAttSum[j] += val;
+ rightAttSum[j] -= val;
+ leftAttSqSum[j] += val * val;
+ rightAttSqSum[j] -= val * val;
+ leftSqMean = leftAttSum[j] / (i - node.m_Start + 1);
+ leftSqMean *= leftSqMean;
+ rightSqMean = rightAttSum[j] / (node.m_End - i);
+ rightSqMean *= rightSqMean;
+
+ leftSqSum += leftAttSqSum[j] - (i - node.m_Start + 1) * leftSqMean;
+ rightSqSum += rightAttSqSum[j] - (node.m_End - i) * rightSqMean;
+ }
+
+ if (minSum > (leftSqSum + rightSqSum)) {
+ minSum = leftSqSum + rightSqSum;
+
+ if (i < node.m_End)
+ splitVal = (m_Instances.instance(m_InstList[i]).valueSparse(dim) + m_Instances
+ .instance(m_InstList[i + 1]).valueSparse(dim)) / 2;
+ else
+ splitVal = m_Instances.instance(m_InstList[i]).valueSparse(dim);
+
+ splitDim = dim;
+ }
+ }// end for instance i
+ }// end for attribute dim
+
+ int rightStart = rearrangePoints(m_InstList, node.m_Start, node.m_End,
+ splitDim, splitVal);
+
+ if (rightStart == node.m_Start || rightStart > node.m_End) {
+ System.out.println("node.m_Start: " + node.m_Start + " node.m_End: "
+ + node.m_End + " splitDim: " + splitDim + " splitVal: " + splitVal
+ + " node.min: " + node.m_NodeRanges[splitDim][MIN] + " node.max: "
+ + node.m_NodeRanges[splitDim][MAX] + " node.numInstances: "
+ + node.numInstances());
+
+ if (rightStart == node.m_Start)
+ throw new Exception("Left child is empty in node " + node.m_NodeNumber
+ + ". Not possible with "
+ + "KMeanInspiredMethod splitting method. Please " + "check code.");
+ else
+ throw new Exception("Right child is empty in node " + node.m_NodeNumber
+ + ". Not possible with "
+ + "KMeansInspiredMethod splitting method. Please " + "check code.");
+ }
+
+ node.m_SplitDim = splitDim;
+ node.m_SplitValue = splitVal;
+ node.m_Left = new KDTreeNode(numNodesCreated + 1, node.m_Start,
+ rightStart - 1, m_EuclideanDistance.initializeRanges(m_InstList,
+ node.m_Start, rightStart - 1));
+ node.m_Right = new KDTreeNode(numNodesCreated + 2, rightStart, node.m_End,
+ m_EuclideanDistance
+ .initializeRanges(m_InstList, rightStart, node.m_End));
+ }
+
+ /**
+ * Partitions the instances around a pivot. Used by quicksort and
+ * kthSmallestValue.
+ *
+ * @param insts The instances on which the tree is (or is
+ * to be) built.
+ * @param index The master index array containing indices
+ * of the instances.
+ * @param attidx The attribution/dimension based on which
+ * the instances should be partitioned.
+ * @param l The begining index of the portion of master index
+ * array that should be partitioned.
+ * @param r The end index of the portion of master index array
+ * that should be partitioned.
+ * @return the index of the middle element
+ */
+ protected static int partition(Instances insts, int[] index, int attidx, int l, int r) {
+
+ double pivot = insts.instance(index[(l + r) / 2]).valueSparse(attidx);
+ int help;
+
+ while (l < r) {
+ while ((insts.instance(index[l]).valueSparse(attidx) < pivot) && (l < r)) {
+ l++;
+ }
+ while ((insts.instance(index[r]).valueSparse(attidx) > pivot) && (l < r)) {
+ r--;
+ }
+ if (l < r) {
+ help = index[l];
+ index[l] = index[r];
+ index[r] = help;
+ l++;
+ r--;
+ }
+ }
+ if ((l == r) && (insts.instance(index[r]).valueSparse(attidx) > pivot)) {
+ r--;
+ }
+
+ return r;
+ }
+
+ /**
+ * Sorts the instances according to the given attribute/dimension.
+ * The sorting is done on the master index array and not on the
+ * actual instances object.
+ *
+ * @param insts The instances on which the tree is (or is
+ * to be) built.
+ * @param indices The master index array containing indices
+ * of the instances.
+ * @param attidx The dimension/attribute based on which
+ * the instances should be sorted.
+ * @param left The begining index of the portion of the master
+ * index array that needs to be sorted.
+ * @param right The end index of the portion of the master index
+ * array that needs to be sorted.
+ */
+ protected static void quickSort(Instances insts, int[] indices, int attidx, int left, int right) {
+
+ if (left < right) {
+ int middle = partition(insts, indices, attidx, left, right);
+ quickSort(insts, indices, attidx, left, middle);
+ quickSort(insts, indices, attidx, middle + 1, right);
+ }
+ }
+
+ /**
+ * Method to validate the sorting done by quickSort().
+ *
+ * @param insts The instances on which the tree is (or is
+ * to be) built.
+ * @param indices The master index array containing indices
+ * of the instances.
+ * @param attidx The dimension/attribute based on which
+ * the instances should be sorted.
+ * @param start The start of the portion in master index
+ * array that needs to be sorted.
+ * @param end The end of the portion in master index
+ * array that needs to be sorted.
+ * @throws Exception If the indices of the instances
+ * are not in sorted order.
+ */
+ private static void checkSort(Instances insts, int[] indices, int attidx,
+ int start, int end) throws Exception {
+ for(int i=start+1; i<=end; i++) {
+ if( insts.instance(indices[i-1]).valueSparse(attidx) >
+ insts.instance(indices[i]).valueSparse(attidx) ) {
+ System.out.println("value[i-1]: "+insts.instance(indices[i-1]).valueSparse(attidx));
+ System.out.println("value[i]: "+insts.instance(indices[i]).valueSparse(attidx));
+ System.out.println("indices[i-1]: "+indices[i-1]);
+ System.out.println("indices[i]: "+indices[i]);
+ System.out.println("i: "+i);
+ if(insts.instance(indices[i-1]).valueSparse(attidx) > insts.instance(indices[i]).valueSparse(attidx))
+ System.out.println("value[i-1] > value[i]");
+
+ throw new Exception("Indices not sorted correctly.");
+ }//end if
+ }
+ }
+
+ /**
+ * Re-arranges the indices array so that in the portion of the array
+ * belonging to the node to be split, the points <= to the splitVal
+ * are on the left of the portion and those > the splitVal are on the right.
+ *
+ * @param indices The master index array.
+ * @param startidx The begining index of portion of indices that needs
+ * re-arranging.
+ * @param endidx The end index of portion of indices that needs
+ * re-arranging.
+ * @param splitDim The split dimension/attribute.
+ * @param splitVal The split value.
+ * @return The startIdx of the points > the splitVal (the points
+ * belonging to the right child of the node).
+ */
+ protected int rearrangePoints(int[] indices, final int startidx, final int endidx,
+ final int splitDim, final double splitVal) {
+
+ int tmp, left = startidx - 1;
+ for (int i = startidx; i <= endidx; i++) {
+ if (m_EuclideanDistance.valueIsSmallerEqual(m_Instances
+ .instance(indices[i]), splitDim, splitVal)) {
+ left++;
+ tmp = indices[left];
+ indices[left] = indices[i];
+ indices[i] = tmp;
+ }// end valueIsSmallerEqual
+ }// endfor
+ return left + 1;
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.2 $");
+// }
+}
diff --git a/src/gaknn/core/kdtree/NearestNeighbourSearch.java b/src/gaknn/core/kdtree/NearestNeighbourSearch.java
new file mode 100644
index 0000000..efd989c
--- /dev/null
+++ b/src/gaknn/core/kdtree/NearestNeighbourSearch.java
@@ -0,0 +1,922 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * NearestNeighbourSearch.java
+ * Copyright (C) 1999-2007 University of Waikato
+ */
+
+package gaknn.core.kdtree;
+
+//import weka.core.AdditionalMeasureProducer;
+//import weka.core.DistanceFunction;
+//import weka.core.EuclideanDistance;
+import gaknn.core.Instance;
+import gaknn.core.Instances;
+//import weka.core.Option;
+//import weka.core.OptionHandler;
+//import weka.core.RevisionHandler;
+//import weka.core.RevisionUtils;
+//import weka.core.Utils;
+
+import java.io.Serializable;
+import java.util.Enumeration;
+import java.util.Vector;
+
+/**
+ * Abstract class for nearest neighbour search. All algorithms (classes) that
+ * do nearest neighbour search should extend this class.
+ *
+ * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
+ * @version $Revision: 1.2 $
+ */
+public abstract class NearestNeighbourSearch
+ implements Serializable,AdditionalMeasureProducer
+ {
+
+ /**
+ * A class for a heap to store the nearest k neighbours to an instance.
+ * The heap also takes care of cases where multiple neighbours are the same
+ * distance away.
+ * i.e. the minimum size of the heap is k.
+ *
+ * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
+ * @version $Revision: 1.2 $
+ */
+ protected class MyHeap
+ {
+
+ /** the heap. */
+ MyHeapElement m_heap[] = null;
+
+ /**
+ * constructor.
+ *
+ * @param maxSize the maximum size of the heap
+ */
+ public MyHeap(int maxSize) {
+ if((maxSize%2)==0)
+ maxSize++;
+
+ m_heap = new MyHeapElement[maxSize+1];
+ m_heap[0] = new MyHeapElement(0, 0);
+ }
+
+ /**
+ * returns the size of the heap.
+ *
+ * @return the size
+ */
+ public int size() {
+ return m_heap[0].index;
+ }
+
+ /**
+ * peeks at the first element.
+ *
+ * @return the first element
+ */
+ public MyHeapElement peek() {
+ return m_heap[1];
+ }
+
+ /**
+ * returns the first element and removes it from the heap.
+ *
+ * @return the first element
+ * @throws Exception if no elements in heap
+ */
+ public MyHeapElement get() throws Exception {
+ if(m_heap[0].index==0)
+ throw new Exception("No elements present in the heap");
+ MyHeapElement r = m_heap[1];
+ m_heap[1] = m_heap[m_heap[0].index];
+ m_heap[0].index--;
+ downheap();
+ return r;
+ }
+
+ /**
+ * adds the value to the heap.
+ *
+ * @param i the index
+ * @param d the distance
+ * @throws Exception if the heap gets too large
+ */
+ public void put(int i, double d) throws Exception {
+ if((m_heap[0].index+1)>(m_heap.length-1))
+ throw new Exception("the number of elements cannot exceed the "+
+ "initially set maximum limit");
+ m_heap[0].index++;
+ m_heap[m_heap[0].index] = new MyHeapElement(i, d);
+ upheap();
+ }
+
+ /**
+ * Puts an element by substituting it in place of
+ * the top most element.
+ *
+ * @param i the index
+ * @param d the distance
+ * @throws Exception if distance is smaller than that of the head
+ * element
+ */
+ public void putBySubstitute(int i, double d) throws Exception {
+ MyHeapElement head = get();
+ put(i, d);
+ // System.out.println("previous: "+head.distance+" current: "+m_heap[1].distance);
+ if(head.distance == m_heap[1].distance) { //Utils.eq(head.distance, m_heap[1].distance)) {
+ putKthNearest(head.index, head.distance);
+ }
+ else if(head.distance > m_heap[1].distance) { //Utils.gr(head.distance, m_heap[1].distance)) {
+ m_KthNearest = null;
+ m_KthNearestSize = 0;
+ initSize = 10;
+ }
+ else if(head.distance < m_heap[1].distance) {
+ throw new Exception("The substituted element is smaller than the "+
+ "head element. put() should have been called "+
+ "in place of putBySubstitute()");
+ }
+ }
+
+ /** the kth nearest ones. */
+ MyHeapElement m_KthNearest[] = null;
+
+ /** The number of kth nearest elements. */
+ int m_KthNearestSize = 0;
+
+ /** the initial size of the heap. */
+ int initSize=10;
+
+ /**
+ * returns the number of k nearest.
+ *
+ * @return the number of k nearest
+ * @see #m_KthNearestSize
+ */
+ public int noOfKthNearest() {
+ return m_KthNearestSize;
+ }
+
+ /**
+ * Stores kth nearest elements (if there are
+ * more than one).
+ * @param i the index
+ * @param d the distance
+ */
+ public void putKthNearest(int i, double d) {
+ if(m_KthNearest==null) {
+ m_KthNearest = new MyHeapElement[initSize];
+ }
+ if(m_KthNearestSize>=m_KthNearest.length) {
+ initSize += initSize;
+ MyHeapElement temp[] = new MyHeapElement[initSize];
+ System.arraycopy(m_KthNearest, 0, temp, 0, m_KthNearest.length);
+ m_KthNearest = temp;
+ }
+ m_KthNearest[m_KthNearestSize++] = new MyHeapElement(i, d);
+ }
+
+ /**
+ * returns the kth nearest element or null if none there.
+ *
+ * @return the kth nearest element
+ */
+ public MyHeapElement getKthNearest() {
+ if(m_KthNearestSize==0)
+ return null;
+ m_KthNearestSize--;
+ return m_KthNearest[m_KthNearestSize];
+ }
+
+ /**
+ * performs upheap operation for the heap
+ * to maintian its properties.
+ */
+ protected void upheap() {
+ int i = m_heap[0].index;
+ MyHeapElement temp;
+ while( i > 1 && m_heap[i].distance>m_heap[i/2].distance) {
+ temp = m_heap[i];
+ m_heap[i] = m_heap[i/2];
+ i = i/2;
+ m_heap[i] = temp; //this is i/2 done here to avoid another division.
+ }
+ }
+
+ /**
+ * performs downheap operation for the heap
+ * to maintian its properties.
+ */
+ protected void downheap() {
+ int i = 1;
+ MyHeapElement temp;
+ while( ( (2*i) <= m_heap[0].index &&
+ m_heap[i].distance < m_heap[2*i].distance )
+ ||
+ ( (2*i+1) <= m_heap[0].index &&
+ m_heap[i].distance < m_heap[2*i+1].distance) ) {
+ if((2*i+1)<=m_heap[0].index) {
+ if(m_heap[2*i].distance>m_heap[2*i+1].distance) {
+ temp = m_heap[i];
+ m_heap[i] = m_heap[2*i];
+ i = 2*i;
+ m_heap[i] = temp;
+ }
+ else {
+ temp = m_heap[i];
+ m_heap[i] = m_heap[2*i+1];
+ i = 2*i+1;
+ m_heap[i] = temp;
+ }
+ }
+ else {
+ temp = m_heap[i];
+ m_heap[i] = m_heap[2*i];
+ i = 2*i;
+ m_heap[i] = temp;
+ }
+ }
+ }
+
+ /**
+ * returns the total size.
+ *
+ * @return the total size
+ */
+ public int totalSize() {
+ return size()+noOfKthNearest();
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.2 $");
+// }
+ }
+
+ /**
+ * A class for storing data about a neighboring instance.
+ *
+ * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
+ * @version $Revision: 1.2 $
+ */
+ protected class MyHeapElement
+ {
+
+ /** the index of this element. */
+ public int index;
+
+ /** the distance of this element. */
+ public double distance;
+
+ /**
+ * constructor.
+ *
+ * @param i the index
+ * @param d the distance
+ */
+ public MyHeapElement(int i, double d) {
+ distance = d;
+ index = i;
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.2 $");
+// }
+ }
+
+ /**
+ * A class for storing data about a neighboring instance.
+ *
+ * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
+ * @version $Revision: 1.2 $
+ */ //better to change this into a heap element
+ protected class NeighborNode
+ {
+
+ /** The neighbor instance. */
+ public Instance m_Instance;
+
+ /** The distance from the current instance to this neighbor. */
+ public double m_Distance;
+
+ /** A link to the next neighbor instance. */
+ public NeighborNode m_Next;
+
+ /**
+ * Create a new neighbor node.
+ *
+ * @param distance the distance to the neighbor
+ * @param instance the neighbor instance
+ * @param next the next neighbor node
+ */
+ public NeighborNode(double distance, Instance instance, NeighborNode next) {
+ m_Distance = distance;
+ m_Instance = instance;
+ m_Next = next;
+ }
+
+ /**
+ * Create a new neighbor node that doesn't link to any other nodes.
+ *
+ * @param distance the distance to the neighbor
+ * @param instance the neighbor instance
+ */
+ public NeighborNode(double distance, Instance instance) {
+
+ this(distance, instance, null);
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.2 $");
+// }
+ }
+
+ /**
+ * A class for a linked list to store the nearest k neighbours
+ * to an instance. We use a list so that we can take care of
+ * cases where multiple neighbours are the same distance away.
+ * i.e. the minimum length of the list is k.
+ *
+ * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
+ * @version $Revision: 1.2 $
+ */ //better to change this into a heap
+ protected class NeighborList
+ {
+
+ /** The first node in the list. */
+ protected NeighborNode m_First;
+
+ /** The last node in the list. */
+ protected NeighborNode m_Last;
+
+ /** The number of nodes to attempt to maintain in the list. */
+ protected int m_Length = 1;
+
+ /**
+ * Creates the neighborlist with a desired length.
+ *
+ * @param length the length of list to attempt to maintain
+ */
+ public NeighborList(int length) {
+ m_Length = length;
+ }
+
+ /**
+ * Gets whether the list is empty.
+ *
+ * @return true if list is empty
+ */
+ public boolean isEmpty() {
+ return (m_First == null);
+ }
+
+ /**
+ * Gets the current length of the list.
+ *
+ * @return the current length of the list
+ */
+ public int currentLength() {
+ int i = 0;
+ NeighborNode current = m_First;
+ while (current != null) {
+ i++;
+ current = current.m_Next;
+ }
+ return i;
+ }
+
+ /**
+ * Inserts an instance neighbor into the list, maintaining the list
+ * sorted by distance.
+ *
+ * @param distance the distance to the instance
+ * @param instance the neighboring instance
+ */
+ public void insertSorted(double distance, Instance instance) {
+
+ if (isEmpty()) {
+ m_First = m_Last = new NeighborNode(distance, instance);
+ } else {
+ NeighborNode current = m_First;
+ if (distance < m_First.m_Distance) {// Insert at head
+ m_First = new NeighborNode(distance, instance, m_First);
+ } else { // Insert further down the list
+ for( ;(current.m_Next != null) &&
+ (current.m_Next.m_Distance < distance);
+ current = current.m_Next);
+ current.m_Next = new NeighborNode(distance, instance,
+ current.m_Next);
+ if (current.equals(m_Last)) {
+ m_Last = current.m_Next;
+ }
+ }
+
+ // Trip down the list until we've got k list elements (or more if the
+ // distance to the last elements is the same).
+ int valcount = 0;
+ for(current = m_First; current.m_Next != null;
+ current = current.m_Next) {
+ valcount++;
+ if ((valcount >= m_Length) && (current.m_Distance !=
+ current.m_Next.m_Distance)) {
+ m_Last = current;
+ current.m_Next = null;
+ break;
+ }
+ }
+ }
+ }
+
+ /**
+ * Prunes the list to contain the k nearest neighbors. If there are
+ * multiple neighbors at the k'th distance, all will be kept.
+ *
+ * @param k the number of neighbors to keep in the list.
+ */
+ public void pruneToK(int k) {
+
+ if (isEmpty()) {
+ return;
+ }
+ if (k < 1) {
+ k = 1;
+ }
+ int currentK = 0;
+ double currentDist = m_First.m_Distance;
+ NeighborNode current = m_First;
+ for(; current.m_Next != null; current = current.m_Next) {
+ currentK++;
+ currentDist = current.m_Distance;
+ if ((currentK >= k) && (currentDist != current.m_Next.m_Distance)) {
+ m_Last = current;
+ current.m_Next = null;
+ break;
+ }
+ }
+ }
+
+ /**
+ * Prints out the contents of the neighborlist.
+ */
+ public void printList() {
+
+ if (isEmpty()) {
+ System.out.println("Empty list");
+ } else {
+ NeighborNode current = m_First;
+ while (current != null) {
+ System.out.println("Node: instance " + current.m_Instance
+ + ", distance " + current.m_Distance);
+ current = current.m_Next;
+ }
+ System.out.println();
+ }
+ }
+
+ /**
+ * returns the first element in the list.
+ *
+ * @return the first element
+ */
+ public NeighborNode getFirst() {
+ return m_First;
+ }
+
+ /**
+ * returns the last element in the list.
+ *
+ * @return the last element
+ */
+ public NeighborNode getLast() {
+ return m_Last;
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.2 $");
+// }
+ }
+
+ /** The neighbourhood of instances to find neighbours in. */
+ protected Instances m_Instances;
+
+ /** The number of neighbours to find. */
+ protected int m_kNN;
+
+ /** the distance function used. */
+ protected DistanceFunction m_DistanceFunction = new EuclideanDistance();
+
+ /** Performance statistics. */
+ protected PerformanceStats m_Stats = null;
+
+ /** Should we measure Performance. */
+ protected boolean m_MeasurePerformance = false;
+
+ /**
+ * Constructor.
+ */
+ public NearestNeighbourSearch() {
+ if(m_MeasurePerformance)
+ m_Stats = new PerformanceStats();
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param insts The set of instances that constitute the neighbourhood.
+ */
+ public NearestNeighbourSearch(Instances insts) {
+ this();
+ m_Instances = insts;
+ }
+
+ /**
+ * Returns a string describing this nearest neighbour search algorithm.
+ *
+ * @return a description of the algorithm for displaying in the
+ * explorer/experimenter gui
+ */
+ public String globalInfo() {
+ return
+ "Abstract class for nearest neighbour search. All algorithms (classes) that "
+ + "do nearest neighbour search should extend this class.";
+ }
+
+ /**
+ * Returns an enumeration describing the available options.
+ *
+ * @return an enumeration of all the available options.
+ */
+// public Enumeration listOptions() {
+// Vector newVector = new Vector();
+//
+// newVector.add(new Option(
+// "\tDistance function to use.\n"
+// + "\t(default: weka.core.EuclideanDistance)",
+// "A", 1,"-A
+ * first-last
+ * 1,2,3,4
+ * first last 2 first-last first-4 4-last
+ * Doesn't check that a < b for a-b
+ *
+ * @param range the string to check
+ * @return true if the range is valid
+ */
+ protected boolean isValidRange(String range) {
+
+ if (range == null) {
+ return false;
+ }
+ int hyphenIndex;
+ if ((hyphenIndex = range.indexOf('-')) >= 0) {
+ if (isValidRange(range.substring(0, hyphenIndex)) &&
+ isValidRange(range.substring(hyphenIndex + 1))) {
+ return true;
+ }
+ return false;
+ }
+ if (range.toLowerCase().equals("first")) {
+ return true;
+ }
+ if (range.toLowerCase().equals("last")) {
+ return true;
+ }
+ try {
+ int index = Integer.parseInt(range);
+ if ((index > 0) && (index <= m_Upper + 1)){
+ return true;
+ }
+ return false;
+ } catch (NumberFormatException ex) {
+ return false;
+ }
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.18 $");
+// }
+
+ /**
+ * Main method for testing this class.
+ *
+ * @param argv one parameter: a test range specification
+ */
+ public static void main(String [] argv) {
+
+ try {
+ if (argv.length == 0) {
+ throw new Exception("Usage: Range
+ *
+ * For more information see also:
+ *
+ * David M. Mount (2006). ANN Programming Manual. College Park, MD, USA.
+ *
+
+ *
+
+ * BibTeX:
+ *
+ * @manual{Mount2006,
+ * address = {College Park, MD, USA},
+ * author = {David M. Mount},
+ * organization = {Department of Computer Science, University of Maryland},
+ * title = {ANN Programming Manual},
+ * year = {2006},
+ * HTTP = {Available from http://www.cs.umd.edu/\~mount/ANN/}
+ * }
+ *
+ *
+
+ *
+
+
+ *
+ * @author Ashraf M. Kibriya (amk14@waikato.ac.nz)
+ * @version $Revision: 1.3 $
+ */
+public class SlidingMidPointOfWidestSide
+ extends KDTreeNodeSplitter
+ {
+
+ /** for serialization. */
+ private static final long serialVersionUID = 852857628205680562L;
+
+ /** The floating point error to tolerate in finding the widest
+ * rectangular side. */
+ protected static double ERR = 0.001;
+
+ /**
+ * Returns a string describing this nearest neighbour search algorithm.
+ *
+ * @return a description of the algorithm for displaying in the
+ * explorer/experimenter gui
+ */
+// public String globalInfo() {
+// return
+// "The class that splits a node into two based on the midpoint value of "
+// + "the dimension in which the node's rectangle is widest. If after "
+// + "splitting one side is empty then it is slided towards the non-empty "
+// + "side until there is at least one point on the empty side.\n\n"
+// + "For more information see also:\n\n"
+// + getTechnicalInformation().toString();
+// }
+
+ /**
+ * Returns an instance of a TechnicalInformation object, containing detailed
+ * information about the technical background of this class, e.g., paper
+ * reference or book this class is based on.
+ *
+ * @return the technical information about this class
+ */
+// public TechnicalInformation getTechnicalInformation() {
+// TechnicalInformation result;
+//
+// result = new TechnicalInformation(Type.MANUAL);
+// result.setValue(Field.AUTHOR, "David M. Mount");
+// result.setValue(Field.YEAR, "2006");
+// result.setValue(Field.TITLE, "ANN Programming Manual");
+// result.setValue(Field.ORGANIZATION, "Department of Computer Science, University of Maryland");
+// result.setValue(Field.ADDRESS,
+// "College Park, MD, USA");
+// result.setValue(Field.HTTP,
+// "Available from http://www.cs.umd.edu/~mount/ANN/");
+//
+// return result;
+// }
+
+ /**
+ * Splits a node into two based on the midpoint value of the dimension
+ * in which the node's rectangle is widest. If after splitting one side
+ * is empty then it is slided towards the non-empty side until there is
+ * at least one point on the empty side. The two nodes created after the
+ * whole splitting are correctly initialised. And, node.left and
+ * node.right are set appropriately.
+ * @param node The node to split.
+ * @param numNodesCreated The number of nodes that so far have been
+ * created for the tree, so that the newly created nodes are
+ * assigned correct/meaningful node numbers/ids.
+ * @param nodeRanges The attributes' range for the points inside
+ * the node that is to be split.
+ * @param universe The attributes' range for the whole
+ * point-space.
+ * @throws Exception If there is some problem in splitting the
+ * given node.
+ */
+ public void splitNode(KDTreeNode node, int numNodesCreated,
+ double[][] nodeRanges, double[][] universe) throws Exception {
+
+ correctlyInitialized();
+
+ if (node.m_NodesRectBounds == null) {
+ node.m_NodesRectBounds = new double[2][node.m_NodeRanges.length];
+ for (int i = 0; i < node.m_NodeRanges.length; i++) {
+ node.m_NodesRectBounds[MIN][i] = node.m_NodeRanges[i][MIN];
+ node.m_NodesRectBounds[MAX][i] = node.m_NodeRanges[i][MAX];
+ }
+ }
+
+ // finding widest side of the hyper rectangle
+ double maxRectWidth = Double.NEGATIVE_INFINITY, maxPtWidth = Double.NEGATIVE_INFINITY, tempval;
+ int splitDim = -1, classIdx = m_Instances.GetClassIndex();
+
+ for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
+ if (i == classIdx)
+ continue;
+ tempval = node.m_NodesRectBounds[MAX][i] - node.m_NodesRectBounds[MIN][i];
+ if (m_NormalizeNodeWidth) {
+ tempval = tempval / universe[i][WIDTH];
+ }
+ if (tempval > maxRectWidth && node.m_NodeRanges[i][WIDTH] > 0.0)
+ maxRectWidth = tempval;
+ }
+
+ for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
+ if (i == classIdx)
+ continue;
+ tempval = node.m_NodesRectBounds[MAX][i] - node.m_NodesRectBounds[MIN][i];
+ if (m_NormalizeNodeWidth) {
+ tempval = tempval / universe[i][WIDTH];
+ }
+ if (tempval >= maxRectWidth * (1 - ERR)
+ && node.m_NodeRanges[i][WIDTH] > 0.0) {
+ if (node.m_NodeRanges[i][WIDTH] > maxPtWidth) {
+ maxPtWidth = node.m_NodeRanges[i][WIDTH];
+ if (m_NormalizeNodeWidth)
+ maxPtWidth = maxPtWidth / universe[i][WIDTH];
+ splitDim = i;
+ }
+ }
+ }
+
+ double splitVal = node.m_NodesRectBounds[MIN][splitDim]
+ + (node.m_NodesRectBounds[MAX][splitDim] - node.m_NodesRectBounds[MIN][splitDim])
+ * 0.5;
+ // might want to try to slide it further to contain more than one point on
+ // the
+ // side that is resulting empty
+ if (splitVal < node.m_NodeRanges[splitDim][MIN])
+ splitVal = node.m_NodeRanges[splitDim][MIN];
+ else if (splitVal >= node.m_NodeRanges[splitDim][MAX])
+ splitVal = node.m_NodeRanges[splitDim][MAX]
+ - node.m_NodeRanges[splitDim][WIDTH] * 0.001;
+
+ int rightStart = rearrangePoints(m_InstList, node.m_Start, node.m_End,
+ splitDim, splitVal);
+
+ if (rightStart == node.m_Start || rightStart > node.m_End) {
+ if (rightStart == node.m_Start)
+ throw new Exception("Left child is empty in node " + node.m_NodeNumber
+ + ". Not possible with "
+ + "SlidingMidPointofWidestSide splitting method. Please "
+ + "check code.");
+ else
+ throw new Exception("Right child is empty in node " + node.m_NodeNumber
+ + ". Not possible with "
+ + "SlidingMidPointofWidestSide splitting method. Please "
+ + "check code.");
+ }
+
+ node.m_SplitDim = splitDim;
+ node.m_SplitValue = splitVal;
+
+ double[][] widths = new double[2][node.m_NodesRectBounds[0].length];
+
+ System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN], 0,
+ node.m_NodesRectBounds[MIN].length);
+ System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX], 0,
+ node.m_NodesRectBounds[MAX].length);
+ widths[MAX][splitDim] = splitVal;
+
+ node.m_Left = new KDTreeNode(numNodesCreated + 1, node.m_Start,
+ rightStart - 1, m_EuclideanDistance.initializeRanges(m_InstList,
+ node.m_Start, rightStart - 1), widths);
+
+ widths = new double[2][node.m_NodesRectBounds[0].length];
+ System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN], 0,
+ node.m_NodesRectBounds[MIN].length);
+ System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX], 0,
+ node.m_NodesRectBounds[MAX].length);
+ widths[MIN][splitDim] = splitVal;
+
+ node.m_Right = new KDTreeNode(numNodesCreated + 2, rightStart, node.m_End,
+ m_EuclideanDistance.initializeRanges(m_InstList, rightStart, node.m_End), widths);
+ }
+
+ /**
+ * Re-arranges the indices array such that the points <= to the splitVal
+ * are on the left of the array and those > the splitVal are on the right.
+ *
+ * @param indices The master index array.
+ * @param startidx The begining index of portion of indices that needs
+ * re-arranging.
+ * @param endidx The end index of portion of indices that needs
+ * re-arranging.
+ * @param splitDim The split dimension/attribute.
+ * @param splitVal The split value.
+ * @return The startIdx of the points > the splitVal (the points
+ * belonging to the right child of the node).
+ */
+ protected int rearrangePoints(int[] indices, final int startidx,
+ final int endidx, final int splitDim, final double splitVal) {
+
+ int tmp, left = startidx - 1;
+ for (int i = startidx; i <= endidx; i++) {
+ if (m_EuclideanDistance.valueIsSmallerEqual(m_Instances
+ .instance(indices[i]), splitDim, splitVal)) {
+ left++;
+ tmp = indices[left];
+ indices[left] = indices[i];
+ indices[i] = tmp;
+ }// end valueIsSmallerEqual
+ }// endfor
+ return left + 1;
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.3 $");
+// }
+}
diff --git a/src/gaknn/core/kdtree/TreePerformanceStats.java b/src/gaknn/core/kdtree/TreePerformanceStats.java
new file mode 100644
index 0000000..9091964
--- /dev/null
+++ b/src/gaknn/core/kdtree/TreePerformanceStats.java
@@ -0,0 +1,322 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * TreePerformanceStats.java
+ * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
+ */
+
+package gaknn.core.kdtree;
+
+//import weka.core.RevisionUtils;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+/**
+ * The class that measures the performance of a tree based
+ * nearest neighbour search algorithm.
+ *
+ * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
+ * @version $Revision: 1.2 $
+ */
+public class TreePerformanceStats
+ extends PerformanceStats {
+
+ /** for serialization. */
+ private static final long serialVersionUID = -6637636693340810373L;
+
+ // Variables for leaves
+ /** The min and max number leaf nodes looked
+ * for a query by the tree based NNS algorithm. */
+ protected int m_MinLeaves, m_MaxLeaves;
+
+ /** The sum of leaf nodes looked
+ * at for all the queries.
+ */
+ protected int m_SumLeaves;
+ /** The squared sum of leaf nodes looked
+ * at for all the queries.
+ */
+ protected int m_SumSqLeaves;
+ /** The number of leaf nodes looked at
+ * for the current/last query.
+ */
+ protected int m_LeafCount;
+
+ // Variables for internal nodes
+ /** The min and max number internal nodes looked
+ * for a query by the tree based NNS algorithm. */
+ protected int m_MinIntNodes, m_MaxIntNodes;
+ /** The sum of internal nodes looked
+ * at for all the queries.
+ */
+ protected int m_SumIntNodes;
+ /** The squared sum of internal nodes looked
+ * at for all the queries.
+ */
+ protected int m_SumSqIntNodes;
+ /** The number of internal nodes looked at
+ * for the current/last query.
+ */
+ protected int m_IntNodeCount;
+
+ /**
+ * Default constructor.
+ */
+ public TreePerformanceStats() {
+ reset();
+ }
+
+ /**
+ * Resets all internal fields/counters.
+ */
+ public void reset() {
+ super.reset();
+ //initializing leaf variables
+ m_SumLeaves = m_SumSqLeaves = m_LeafCount = 0;
+ m_MinLeaves = Integer.MAX_VALUE;
+ m_MaxLeaves = Integer.MIN_VALUE;
+ //initializing internal variables
+ m_SumIntNodes = m_SumSqIntNodes = m_IntNodeCount = 0;
+ m_MinIntNodes = Integer.MAX_VALUE;
+ m_MaxIntNodes = Integer.MIN_VALUE;
+ }
+
+ /**
+ * Signals start of the nearest neighbour search.
+ * Initializes the stats object.
+ */
+ public void searchStart() {
+ super.searchStart();
+ m_LeafCount = 0;
+ m_IntNodeCount = 0;
+ }
+
+ /**
+ * Signals end of the nearest neighbour search.
+ * Calculates the statistics for the search.
+ */
+ public void searchFinish() {
+ super.searchFinish();
+ //updating stats for leaf nodes
+ m_SumLeaves += m_LeafCount; m_SumSqLeaves += m_LeafCount*m_LeafCount;
+ if (m_LeafCount < m_MinLeaves) m_MinLeaves = m_LeafCount;
+ if (m_LeafCount > m_MaxLeaves) m_MaxLeaves = m_LeafCount;
+ //updating stats for internal nodes
+ m_SumIntNodes += m_IntNodeCount; m_SumSqIntNodes += m_IntNodeCount*m_IntNodeCount;
+ if (m_IntNodeCount < m_MinIntNodes) m_MinIntNodes = m_IntNodeCount;
+ if (m_IntNodeCount > m_MaxIntNodes) m_MaxIntNodes = m_IntNodeCount;
+ }
+
+ /**
+ * Increments the leaf count.
+ */
+ public void incrLeafCount() {
+ m_LeafCount++;
+ }
+
+ /**
+ * Increments the internal node count.
+ */
+ public void incrIntNodeCount() {
+ m_IntNodeCount++;
+ }
+
+ // Getter functions for leaves
+
+ /**
+ * Returns the total number of leaves visited.
+ *
+ * @return The total number.
+ */
+ public int getTotalLeavesVisited() {
+ return m_SumLeaves;
+ }
+
+ /**
+ * Returns the mean of number of leaves visited.
+ *
+ * @return The mean number of leaves visited.
+ */
+ public double getMeanLeavesVisited() {
+ return m_SumLeaves/(double)m_NumQueries;
+ }
+
+ /**
+ * Returns the standard deviation of leaves visited.
+ *
+ * @return The standard deviation of leaves visited.
+ */
+ public double getStdDevLeavesVisited() {
+ return Math.sqrt((m_SumSqLeaves - (m_SumLeaves*m_SumLeaves)/(double)m_NumQueries)/(m_NumQueries-1));
+ }
+
+ /**
+ * Returns the minimum number of leaves visited.
+ *
+ * @return The minimum number of leaves visited.
+ */
+ public int getMinLeavesVisited() {
+ return m_MinLeaves;
+ }
+
+ /**
+ * Returns the maximum number of leaves visited.
+ *
+ * @return The maximum number of leaves visited.
+ */
+ public int getMaxLeavesVisited() {
+ return m_MaxLeaves;
+ }
+
+ // Getter functions for internal nodes
+
+ /**
+ * Returns the total number of internal nodes visited.
+ *
+ * @return The total number of internal nodes visited.
+ */
+ public int getTotalIntNodesVisited() {
+ return m_SumIntNodes;
+ }
+
+ /**
+ * Returns the mean of internal nodes visited.
+ *
+ * @return The mean number of internal nodes
+ * visited.
+ */
+ public double getMeanIntNodesVisited() {
+ return m_SumIntNodes/(double)m_NumQueries;
+ }
+
+ /**
+ * Returns the standard deviation of internal nodes visited.
+ *
+ * @return The standard deviation of internal nodes visited.
+ */
+ public double getStdDevIntNodesVisited() {
+ return Math.sqrt((m_SumSqIntNodes - (m_SumIntNodes*m_SumIntNodes)/(double)m_NumQueries)/(m_NumQueries-1));
+ }
+
+ /**
+ * Returns the minimum of internal nodes visited.
+ *
+ * @return The minimum of internal nodes visited.
+ */
+ public int getMinIntNodesVisited() {
+ return m_MinIntNodes;
+ }
+
+ /**
+ * returns the maximum of internal nodes visited.
+ *
+ * @return The maximum of internal nodes visited.
+ */
+ public int getMaxIntNodesVisited() {
+ return m_MaxIntNodes;
+ }
+
+ /**
+ * Returns an enumeration of the additional measure names.
+ *
+ * @return An enumeration of the measure names.
+ */
+ public Enumeration enumerateMeasures() {
+ Vector newVector = new Vector();
+
+ Enumeration en = super.enumerateMeasures();
+ while(en.hasMoreElements())
+ newVector.addElement(en.nextElement());
+
+ newVector.addElement("measureTotal_nodes_visited");
+ newVector.addElement("measureMean_nodes_visited");
+ newVector.addElement("measureStdDev_nodes_visited");
+ newVector.addElement("measureMin_nodes_visited");
+ newVector.addElement("measureMax_nodes_visited");
+ //coord stats
+ newVector.addElement("measureTotal_leaves_visited");
+ newVector.addElement("measureMean_leaves_visited");
+ newVector.addElement("measureStdDev_leaves_visited");
+ newVector.addElement("measureMin_leaves_visited");
+ newVector.addElement("measureMax_leaves_visited");
+
+ return newVector.elements();
+ }
+
+ /**
+ * Returns the value of the named measure.
+ *
+ * @param additionalMeasureName The name of the measure to query for
+ * its value.
+ * @return The value of the named measure.
+ * @throws IllegalArgumentException If the named measure is not
+ * supported.
+ */
+ public double getMeasure(String additionalMeasureName) throws IllegalArgumentException {
+ if (additionalMeasureName.compareToIgnoreCase("measureTotal_nodes_visited") == 0) {
+ return (double) getTotalIntNodesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureMean_nodes_visited") == 0) {
+ return (double) getMeanIntNodesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureStdDev_nodes_visited") == 0) {
+ return (double) getStdDevIntNodesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureMin_nodes_visited") == 0) {
+ return (double) getMinIntNodesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureMax_nodes_visited") == 0) {
+ return (double) getMaxIntNodesVisited();
+ }
+ //coord stats
+ else if (additionalMeasureName.compareToIgnoreCase("measureTotal_leaves_visited") == 0) {
+ return (double) getTotalLeavesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureMean_leaves_visited") == 0) {
+ return (double) getMeanLeavesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureStdDev_leaves_visited") == 0) {
+ return (double) getStdDevLeavesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureMin_leaves_visited") == 0) {
+ return (double) getMinLeavesVisited();
+ } else if (additionalMeasureName.compareToIgnoreCase("measureMax_leaves_visited") == 0) {
+ return (double) getMaxLeavesVisited();
+ } else {
+ return super.getMeasure(additionalMeasureName);
+ }
+ }
+
+ /**
+ * Returns a string representation of the statistics.
+ *
+ * @return The statistics as string.
+ */
+ public String getStats() {
+ StringBuffer buf = new StringBuffer(super.getStats());
+
+ buf.append("leaves: "+getMinLeavesVisited()+", "+getMaxLeavesVisited()+
+ ","+getTotalLeavesVisited()+","+getMeanLeavesVisited()+", "+getStdDevLeavesVisited()+"\n");
+ buf.append("Int nodes: "+getMinIntNodesVisited()+", "+getMaxIntNodesVisited()+
+ ","+getTotalIntNodesVisited()+","+getMeanIntNodesVisited()+", "+getStdDevIntNodesVisited()+"\n");
+
+ return buf.toString();
+ }
+
+ /**
+ * Returns the revision string.
+ *
+ * @return the revision
+ */
+// public String getRevision() {
+// return RevisionUtils.extract("$Revision: 1.2 $");
+// }
+}
diff --git a/src/gaknn/predictor/PredictorKdtree.java b/src/gaknn/predictor/PredictorKdtree.java
new file mode 100644
index 0000000..a164d70
--- /dev/null
+++ b/src/gaknn/predictor/PredictorKdtree.java
@@ -0,0 +1,129 @@
+package gaknn.predictor;
+
+import gaknn.core.Instance;
+import gaknn.core.Instances;
+import gaknn.core.Pair;
+import gaknn.core.kdtree.KDTree;
+import gaknn.similarity.AbstractSimilarity;
+
+public class PredictorKdtree extends Predictor {
+ KDTree kdTree;
+
+ public PredictorKdtree(AbstractSimilarity sim, Instance[] trSet, Instances inst,double[] weights) {
+ super(sim, trSet);
+ kdTree=new KDTree(inst);
+ kdTree.SetWeights(weights);
+ try {
+ kdTree.setInstances(inst);
+
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ System.out.println("building error kd tree");
+ e.printStackTrace();
+ }
+ // TODO Auto-generated constructor stub
+ }
+ /** get attribute values and find the majority class value confidence by finding k nearest neighbors form kd tree
+
+ * find the k nearest neighbors from kd tree and find the vote for each class value and return the majority class value confidence. */
+ @Override
+ public double Predict(Instance instance) {
+ Instances kNeighbours=new Instances(null, m_K);
+ // TODO Auto-generated method stub
+ double[] vote = new double[m_ClassList.length];
+ int ClassIndex = 0;
+ try {
+ // get the k nearest neighbors form kd tree in a form of instances
+ kNeighbours=kdTree.kNearestNeighbours(instance, m_K);
+ for (int i=0; i