Скачиваний:
28
Добавлен:
01.05.2014
Размер:
15.97 Кб
Скачать
unit J48TreeClass;

interface

uses

DistributionClass,
J48SplitClass,
SplitClass,
NoSplitClass,
ModelSelector,
DmmTypes,
Utils,
Stats,
Classes,
FastVector,
Attribute,
Debugger,
RuleItem,
ItemSet,
DmmConstants,
Instances;

type J48Tree = class

private

// The model selection method.
m_toSelectModel: MSelector;

// References to sons.
m_sons: array of J48Tree;

// Count of sons. !!!
m_sonsNumber: integer;

// True if node is empty.
m_isEmpty: boolean;

// The training instances.
m_train: TDMInstances;

// The pruning instances.
m_test: Distribution;

// The id for the node.
m_id: integer;

//True if the tree is to be pruned.
m_pruneTheTree: boolean;

//The confidence factor for pruning.
m_CF: double; //= 0.25f;

// Is subtree raising to be performed?
m_subtreeRaising: boolean; //= true;

// Cleanup after the tree has been built.
m_cleanup: boolean; //= true;

//True if node is leaf.
m_isLeaf: boolean;

// Local model at node. */
m_localModel: Split;

//Collapses a tree to a node if training error doesn't increase.
procedure collapse();

// Builds the tree structure.
// @param data the data for which the tree structure is to be generated.
// @param keepData is training data to be kept?
procedure buildTree(data: TDMInstances; keepData: boolean);

procedure prune();

procedure cleanup(justHeaderInfo: TDMInstances);

function getTrainingErrors(): double;

function getNewTree(data: TDMInstances): J48Tree;

function getEstimatedErrorsForBranch(data: TDMInstances): double;

function getEstimatedErrorsForDistribution(theDistribution: Distribution): double;

function getEstimatedErrors(): double;

procedure newDistribution(data: TDMInstances);

function numLeaves(): integer;

function numNodes(): integer;


public

constructor create(selector: MSelector; pruneTree: boolean; cf: double;
raiseTree: boolean; cleanup: boolean);

//Method for building a pruneable classifier tree.
procedure buildClassifier(data: TDMInstances);

function localModel(): Split;

function toString(): String;

function dumpTree(depth: integer; text: String): String;

function dumpRules(ruleList: TList; ruleItem: TDMRuleItem): TList;

function makeRules(): TList;

end;

implementation

uses Math, SysUtils;

constructor J48Tree.create(selector: MSelector; pruneTree: boolean; cf: double;
raiseTree: boolean; cleanup: boolean);
begin
m_toSelectModel := selector;
m_pruneTheTree := pruneTree;
m_CF := cf;
m_subtreeRaising := raiseTree;
m_cleanup := cleanup;
end;



//Method for building a pruneable classifier tree.
procedure J48Tree.buildClassifier(data: TDMInstances);
var
inst: TDMInstances;
v: TDMFastVector;
i: integer;
atr: TDMAttribute;
begin
inst := TDMInstances(data);
inst.deleteWithMissingClass();
buildTree(inst, m_subtreeRaising);
collapse();
if (m_pruneTheTree)
then
prune();
if (m_cleanup)
then
cleanup(TDMInstances.Create(data, 0));
end;


//Collapses a tree to a node if training error doesn't increase.
procedure J48Tree.collapse();
var
errorsOfSubtree: double;
errorsOfTree: double;
i: integer;
model: J48Split;
begin
if (not m_isLeaf)
then
begin
model := m_LocalModel as J48Split;
errorsOfSubtree := getTrainingErrors();
errorsOfTree := model.getDistribution().numIncorrect();
if (errorsOfSubtree >= errorsOfTree-1E-3)
then
begin
// Free adjacent trees
m_sons := nil;
m_isLeaf := true;
// Get NoSplit Model for tree.
m_localModel := NoSplit(localModel().getDistribution());
end
else
for i := 0 to m_sonsNumber-1
do
m_Sons[i].collapse();
end;
end;


// Builds the tree structure.
// @param data the data for which the tree structure is to be generated.
// @param keepData is training data to be kept?
procedure J48Tree.buildTree(data: TDMInstances; keepData: boolean);
var
i, j: integer;
localInstances: TDMInstanceArray;
model: J48Split;
v: TDMFastVector;
atr: TDMAttribute;
begin
if (keepData)
then
m_train := data;
m_test := nil;
m_isLeaf := false;
m_isEmpty := false;
m_sons := nil;
m_localModel := m_toSelectModel.selectBinModel(data);

if (m_localModel.numSubsets() > 1)
then
begin
model := m_localModel as J48Split;
localInstances := model.split(data);
data := nil;
SetLength(m_sons, model.numSubsets());
m_sonsNumber := model.numSubsets();
for i := 0 to model.numSubsets()-1
do
begin
m_sons[i] := getNewTree(localInstances[i]);
localInstances[i] := nil;
end;
end
else
begin
m_isLeaf := true;
if (Utils.eq(data.sumOfWeights(), 0))
then
m_isEmpty := true;
data := nil;
end;
end;


function J48Tree.getNewTree(data: TDMInstances): J48Tree;
var
newTree: J48Tree;
begin
newTree := J48Tree.create(m_toSelectModel, m_pruneTheTree, m_CF,
m_subtreeRaising, m_cleanup);
newTree.buildTree(data, m_subtreeRaising);
result := newTree;
end;


procedure J48Tree.prune();
var
errorsLargestBranch: double;
errorsLeaf: double;
errorsTree: double;
indexOfLargestBranch: integer;
largestBranch: J48Tree;
i: integer;
begin

if (not m_isLeaf)
then
begin
// Prune all subtrees.
for i := 0 to m_sonsNumber-1
do
m_sons[i].prune();
// Compute error for largest branch
indexOfLargestBranch := m_LocalModel.getDistribution().maxBag();
if (m_subtreeRaising)
then
begin
errorsLargestBranch := m_Sons[indexOfLargestBranch].
getEstimatedErrorsForBranch(m_train);
end
else
begin
errorsLargestBranch := MaxDouble;
end;

// Compute error if this Tree would be leaf
errorsLeaf := getEstimatedErrorsForDistribution(m_LocalModel.getDistribution());
// Compute error for the whole subtree
errorsTree := getEstimatedErrors();
// Decide if leaf is best choice.
if (Utils.smOrEq(errorsLeaf,errorsTree+0.1) and Utils.smOrEq(errorsLeaf,errorsLargestBranch+0.1))
then
begin
// Free son Trees
m_sons := nil;
m_isLeaf := true;
// Get NoSplit Model for node.
m_localModel := NoSplit(m_localModel.getDistribution());
exit;
end;

// Decide if largest branch is better choice
// than whole subtree.
if (Utils.smOrEq(errorsLargestBranch,errorsTree+0.1))
then
begin
largestBranch := m_Sons[indexOfLargestBranch];
m_sons := largestBranch.m_sons;
m_localModel := largestBranch.localModel();
m_isLeaf := largestBranch.m_isLeaf;
newDistribution(m_train);
prune();
end;
end;
end;


procedure J48Tree.newDistribution(data: TDMInstances);
var
localInstances: TDMInstanceArray;
i: integer;

begin
m_localModel.resetDistribution(data);
m_train := data;
if (not m_isLeaf)
then
begin
localInstances := localModel().split(data);
for i := 0 to m_sonsNumber-1
do
m_Sons[i].newDistribution(localInstances[i]);
end
else
// Check whether there are some instances at the leaf now!
if (not Utils.eq(data.sumOfWeights(), 0))
then
begin
m_isEmpty := false;
end;
end;

function J48Tree.localModel(): Split;
begin
result := m_LocalModel;
end;


function J48Tree.getEstimatedErrorsForDistribution(theDistribution: Distribution): double;
begin
if (Utils.eq(theDistribution.total(),0))
then
begin
result := 0;
exit;
end
else
result := theDistribution.numIncorrect()+
Stats.addErrs(theDistribution.total(),
theDistribution.numIncorrect(),m_CF);
end;



function J48Tree.getEstimatedErrorsForBranch(data: TDMInstances): double;
var
localInstances: TDMInstanceArray;
errors: double;
i: integer;
savedDist: Distribution;
model: J48Split;
begin
errors := 0;
if (m_isLeaf)
then
begin
result := getEstimatedErrorsForDistribution(Distribution.createI(data));
exit;
end
else
begin
model := m_localModel as J48Split;
savedDist := m_localModel.getDistribution();
model.resetDistribution(data);
localInstances := model.split(data);
// ?????????????????????????????????????????????? WEKA!!!!!!!!!!!!!!!!!!!! =(((((((((((((
m_localModel.setDistribution(savedDist);
for i := 0 to m_sonsNumber-1
do
errors := errors+
m_sons[i].getEstimatedErrorsForBranch(localInstances[i]);
result := errors;
exit;
end;
end;

function J48Tree.getEstimatedErrors(): double;
var
errors: double;
i: integer;

begin

errors := 0;

if (m_isLeaf)
then
begin
result := getEstimatedErrorsForDistribution(localModel().getDistribution());
exit;
end
else
begin
for i := 0 to m_sonsNumber-1
do
errors := errors+m_sons[i].getEstimatedErrors();
result := errors;
end;
end;



procedure J48Tree.cleanup(justHeaderInfo: TDMInstances);
var
i: integer;
begin
m_train := justHeaderInfo;
m_test := nil;
if (not m_isLeaf)
then
for i := 0 to m_sonsNumber-1
do
m_sons[i].cleanup(justHeaderInfo);
end;


function J48Tree.getTrainingErrors(): double;
var
errors: double;
i: integer;
model: J48Split;
begin
errors := 0;
if (m_isLeaf)
then
begin
result := m_LocalModel.getDistribution().numIncorrect();
exit;
end

else
begin
for i :=0 to m_sonsNumber-1
do
errors := errors+m_sons[i].getTrainingErrors();
result := errors;
end;
end;


function J48Tree.numLeaves(): integer;
var
num: integer;
i: integer;

begin

num := 0;

if (m_isLeaf)
then
begin
result := 1;
exit;
end
else
for i:=0 to m_sonsNumber
do
num := num + m_sons[i].numLeaves();

result := num;
end;

function J48Tree.dumpRules(ruleList: TList; ruleItem: TDMRuleItem): TList;
var
i, j: integer;
model: J48Split;
left, right: TDMItemSet;
la, ra: IArray;
la2, ra2: IArray;
ri: TDMRuleItem;
S: String;
itemset, itemset1: TDMItemSet;
ruleItem1: TDMRuleItem;
begin
SetLength(la, m_train.numAttributes);
SetLength(ra, m_train.numAttributes);
SetLength(la2, m_train.numAttributes);
SetLength(ra2, m_train.numAttributes);
for i := 0 to m_train.numAttributes-1
do
begin
la[i] := IMIN_VALUE;
ra[i] := IMIN_VALUE;
la2[i] := IMIN_VALUE;
ra2[i] := IMIN_VALUE;
end;
left := TDMItemSet.create(la, ra);
right := TDMItemSet.create(la2, ra2);
if (ruleItem = nil)
then
ruleItem := TDMRuleItem.create(left, right);
if (ruleList = nil)
then
ruleList := TList.create();
ruleList.Capacity := 1000;
model := m_localModel as J48Split;
for i := 0 to m_sonsNumber-1
do
begin
ruleItem1 := TDMRuleItem.Create(ruleItem.m_premise.CopyObject as TDMItemSet, ruleItem.m_consequence.CopyObject as TDMItemSet);
ruleItem1 := model.ruleSeting(m_train, ruleItem1, i);
if (not m_sons[i].m_isLeaf)
then
ruleItem := ruleItem1;
if (m_sons[i].m_isLeaf)
then
begin
ruleItem1 := model.dumpRule(i, m_train, ruleItem1);
ruleList.add(TDMRuleItem.Create(ruleItem1.m_premise.CopyObject as TDMItemSet, ruleItem1.m_consequence.CopyObject as TDMItemSet));
// ruleItem := TDMRuleItem.create(left, right);
end
else
begin
ruleList := m_sons[i].dumpRules(ruleList, ruleItem);
end;
end;
result := ruleList;
end;

function J48Tree.dumpTree(depth: integer; text: String): String;
var
i, j: integer;
model: J48Split;
begin
model := m_localModel as J48Split;
for i := 0 to m_sonsNumber-1
do
begin
text := text + #13#10;
for j := 0 to depth-1
do
text := text + '| ';
text := text + model.leftSide(m_train);
text := text + model.rightSide(i, m_train);
if (m_sons[i].m_isLeaf)
then
begin
text := text + ': ';
text := text + model.dumpLabel(i,m_train);
end
else
text := m_sons[i].dumpTree(depth+1,text);
end;
result := text;
end;


function J48Tree.toString(): String;
var
text: String;
begin
if (m_isLeaf)
then
begin
text := text + ': ';
text:= text + m_localModel.dumpLabel(0,m_train);
end
else
text := dumpTree(0,text);
// text:= text + #13#10+'Number of Leaves : '+IntToStr(numLeaves())+#13#10;
// text:= text + #13#10+'Size of the tree : '+IntToStr(numNodes())+#13#10;
//Debugger.Writeln('C:\Weka-3-4\data\log1.txt', text);
result := text;
end;

function J48Tree.makeRules(): TList;
var
ri: TDMRuleItem;
ri1:^TDMRuleItem;
ruleList: TList;
la, ra: IArray;
left, right: TDMItemSet;
i, j: integer;
S: String;
begin
ruleList := TList.Create();
if (m_isLeaf)
then
begin
SetLength(la, m_train.numAttributes);
SetLength(ra, m_train.numAttributes);
for i := 0 to m_train.numAttributes-1
do
begin
la[i] := -1;
ra[i] := -1;
end;
left := TDMItemSet.create(la, ra);
right := TDMItemSet.create(la, ra);
ri := TDMRuleItem.create(left, right);
ri := m_localModel.dumpRule(0, m_train, ri);
//--------------------------
new(ri1);
ri1^:=ri;
ruleList.Add(ri1);
end
else
ruleList := dumpRules(ruleList, nil);
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
for j := 0 to ruleList.Count-1
do
begin
ri := ruleList.items[j];
S := S + 'Rule'+IntToStr(j)+#10#13;
S := S + 'left ';
S := S +#13#10;
for i := 0 to m_train.numAttributes-1
do
S := S + IntToStr(ri.m_premise.m_items[i])+' ';
S := S +#13#10;
for i := 0 to m_train.numAttributes-1
do
S := S + ' ' + IntToStr(ri.m_premise.m_condidtions[i])+' ';
S := S +#13#10;
S := S + 'right ';
S := S +#13#10;
for i := 0 to m_train.numAttributes-1
do
S := S + IntToStr(ri.m_consequence.m_items[i])+ ' ';
S := S +#13#10;
for i := 0 to m_train.numAttributes-1
do
S := S + ' ' + IntToStr(ri.m_consequence.m_condidtions[i])+' ';
S := S +#13#10;
end;
Debugger.writeLn('d:\log.txt', S);
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

result := ruleList;
end;

function J48Tree.numNodes(): integer;
var
no: integer;
i: integer;
begin
no := 1;
if (not m_isLeaf)
then
for i := 0 to m_sonsNumber
do
no := no + m_sons[i].numNodes();

result := no;
end;



end.
Соседние файлы в папке j48