Скачиваний:
28
Добавлен:
01.05.2014
Размер:
17.22 Кб
Скачать
unit RuleGeneration;

interface
uses
Instances,
dmmTypes,
dmmConstants,
uContainers,
ItemSet,
FastVector,
RuleItem,
DoubleObject,
math,
PriorEstimation,
Statistics,
SysUtils,
uConsts;

const
MAX_N = 300;

type
TDMRuleGeneration = class (TObject)
public
//набор
m_items : IArray;

//число векторов содержащих данный набора
m_counter : integer;

//общее количество векторов
m_totalTransactions : integer;

//изменился ли список лучших правил
m_change : boolean;

//минимальное тебуемое значение точности набора для попадания в список лучших
m_expectation : double;

//минимальная требуемая поддержка набора для попадания в список лучших
m_minRuleCount : integer;

m_midPoints : DArray;

m_priors : TStringHashtable;

//список лучших правил
m_best : TArrayList;

m_count : integer;

m_instances : TDMInstances ;

m_OwnValues : boolean;

public
constructor Create(itemSet : TDMItemSet);
property OwnValues: Boolean read m_OwnValues write m_OwnValues;
function generateRules(numRules : integer; midPoints : DArray; priors : TStringHashtable; curExpectation : double; instances : TDMInstances; var best : TArrayList; genTime : integer) : TArrayList;
procedure singleConsequence(instances : TDMInstances; attNum : integer; var consequences : TDMFastVector );
function removeRedundantRuleItem(toInsert : TDMRuleItem) : boolean;
function aSubsumesB( a : TDMRuleItem; b : TDMRuleItem ) : boolean ;

function compareRuleItem (Item: Pointer): Integer;
destructor Destroy; override;

end;
//ожидаемая точность предсказанияя
function expectation(ruleCount : double; premiseCount : integer; midPoints : DArray; priors : TStringHashtable ) : double;
function binomialDistribution(accuracy : double; ruleCount : double; premiseCount : double) : double;
function generateRuleItem(premise : TDMItemSet; consequence : TDMItemSet; instances : TDMInstances ; genTime : integer; minRuleCount : integer; m_midPoints : DArray; m_priors : TStringHashtable): TDMRuleItem;
procedure SetRuleItemAccuracy (var ruleItem : TDMRuleItem; ruleSupport : integer; premise : TDMItemSet;m_midPoints : DArray; m_priors : TStringHashtable);
function equalRuleItem (Value, Item: Pointer): Integer;
function tocompareRuleItem (Value,Item: Pointer): Integer;

implementation
constructor TDMRuleGeneration.Create(itemSet : TDMItemSet);
begin
m_totalTransactions := itemSet.m_totalTransactions;
m_counter := itemSet.m_counter;
m_items := itemSet.m_items;
m_change := false;
m_midPoints :=nil;
m_priors := nil;
m_best := nil;
m_instances := nil;
m_OwnValues := false;
end;

procedure SetRuleItemAccuracy (var ruleItem : TDMRuleItem; ruleSupport : integer; premise : TDMItemSet;m_midPoints : DArray; m_priors : TStringHashtable);
begin
ruleItem.m_accuracy := RuleGeneration.expectation(ruleSupport,premise.m_counter,m_midPoints,m_priors);
if(isNaN(ruleItem.m_accuracy)) or (ruleItem.m_accuracy < 0) then
begin
ruleItem.m_accuracy := MIN_VALUE;
end;
end;

function TDMRuleGeneration.aSubsumesB(a : TDMRuleItem; b : TDMRuleItem ) : boolean ;
var
k : integer;
begin
if(a.m_accuracy < b.m_accuracy) then
begin
result := false;
exit;
end;

k := 0;
while (k < length(a.premise().m_items)) do
begin
if(a.premise().m_items[k] <> b.premise().m_items[k]) then
begin
if( ( ((a.premise().m_items[k]) <> -1) and (b.premise().m_items[k] <> -1)) or (b.premise().m_items[k] = -1)) then
begin
result := false;
exit;
end;
end;
if(a.consequence().m_items[k] <> b.consequence().m_items[k]) then
begin
if(((a.consequence().m_items[k] <> -1) and (b.consequence().m_items[k] <> -1)) or (a.consequence().m_items[k] = -1)) then
begin
result := false;
exit;
end;
end;
inc(k);
end;
result := true;

end;

function generateRuleItem(premise : TDMItemSet; consequence : TDMItemSet; instances : TDMInstances ; genTime : integer; minRuleCount : integer; m_midPoints : DArray; m_priors : TStringHashtable): TDMRuleItem;
var
rule : TDMItemSet;
i,k : integer;
ruleSupport : integer;
newRule : TDMRuleItem;
newpremise : TDMItemSet;
begin
rule := TDMItemSet.Create(instances.numInstances());
SetLength(rule.m_items, length(consequence.m_items));;
rule.m_items := Copy(premise.m_items, 0, length(premise.m_items));

newpremise := TDMItemSet.Create;
newpremise.m_totalTransactions := premise.m_totalTransactions;
newpremise.m_id := premise.m_id;
newpremise.m_items := premise.m_items;
newpremise.m_counter := premise.m_counter;

k := 0;
while (k < length(consequence.m_items))do
begin
if(consequence.m_items[k] <> -1) then
rule.m_items[k] := consequence.m_items[k];
inc(k);
end;

i := 0;
while (i < instances.numInstances()) do
begin
rule.upDateCounter(instances.instance(i));
inc(i);
end;

ruleSupport := rule.support();
if(ruleSupport > minRuleCount) then
begin
newRule := TDMRuleItem.Create(newpremise,consequence,genTime,ruleSupport);
SetRuleItemAccuracy(newRule, ruleSupport,newpremise,m_midPoints,m_priors);
FreeAndNil(rule);
result := newRule;
exit;
end;
FreeAndNil(rule);
FreeAndNil(newpremise);
result := nil;
end;

function binomialDistribution(accuracy : double; ruleCount : double; premiseCount : double) : double;
var
mu, sigma : double;
begin
if(premiseCount < MAX_N) then
begin
result := power(2,(log2(power(accuracy,ruleCount))+
log2(power((1.0-accuracy),(premiseCount-ruleCount)))+
PriorEstimation.logbinomialCoefficient(trunc(premiseCount),trunc(ruleCount))));
end
else
begin
mu := premiseCount * accuracy;
sigma := sqrt((premiseCount * (1.0 - accuracy))*accuracy);
result := Statistics.normalProbability(((ruleCount+0.5)-mu)/(sigma*sqrt(2)));
end;
end;

function expectation(ruleCount : double; premiseCount : integer; midPoints : DArray; priors : TStringHashtable ) : double;
var
numerator,denominator : double;
i : integer;
actualPrior : double;
midPointValue : AnsiString;
curTDMDoubleObject : TDMDoubleObject;
addend : double;
help : AnsiString;
begin
numerator := 0;
denominator := 0;

i := 0;
while (i < length(midPoints)) do
begin
Str(midPoints[i], midPointValue);
curTDMDoubleObject := priors.Items[midPointValue];
if(curTDMDoubleObject <> nil) then
begin
actualPrior := curTDMDoubleObject.doubleValue;
if(actualPrior <> 0) then
begin
addend := actualPrior * binomialDistribution(midPoints[i], ruleCount, premiseCount);
denominator := denominator + addend;
numerator := numerator + addend*midPoints[i];
end;
end;
inc(i);
end;
if(denominator <= 0) or (isNaN(denominator)) then
begin
Str(denominator:4:2, help);
raise Exception.Create('Значение denominator: '+ help);
end;
if(numerator <= 0) or (isNaN(numerator)) then
begin
Str(numerator:4:2, help);
raise Exception.Create( 'Значение numerator: '+ help);
end;
result := numerator/denominator;
end;

procedure TDMRuleGeneration.singleConsequence(instances : TDMInstances; attNum : integer; var consequences : TDMFastVector );
var
consequence : TDMItemSet;
i,j,k : integer;
begin
i := 0;
while (i < instances.numAttributes()) do
begin
if( i = attNum) then
begin
j := 0;
while ( j < instances.attribute(i).numValues()) do
begin
consequence := TDMItemSet.Create(instances.numInstances());
SetLength(consequence.m_items,instances.numAttributes());
k := 0;
while ( k < instances.numAttributes()) do
begin
consequence.m_items[k] := -1;
inc(k);
end;
consequence.m_items[i] := j;
consequences.addElement(consequence);
inc(j);
end;
end;
inc(i);
end;

end;

function equalRuleItem (Value, Item: Pointer): Integer;
var
itemSet : TDMRuleItem;
itemSetToCompare : TDMRuleItem;
PDMRuleItem : ^TDMRuleItem;
begin
PDMRuleItem := Value;
itemSet := PDMRuleItem^ ;
PDMRuleItem := Item;
itemSetToCompare := PDMRuleItem^;
if (itemSet.equals(itemSetToCompare)) then
result := 0
else
result := -1;
end;


function TDMRuleGeneration.removeRedundantRuleItem(toInsert : TDMRuleItem) : boolean;
var
redundant, fSubsumesT, tSubsumesF : boolean;
first : TDMRuleItem;
subsumes : integer;
i : integer;
plist : PPointerItemList;
list : TPointerItemList;
remove : boolean;
index : integer;
best : TArrayList;

begin

redundant := false;
fSubsumesT := false;
tSubsumesF := false;

subsumes := 0;
plist := m_best.ItemList;
i:=0;
while (i < m_best.Count) do
begin
first := plist^[i];
fSubsumesT := aSubsumesB(first,toInsert);
tSubsumesF := aSubsumesB(toInsert, first);
if(fSubsumesT) then
begin
subsumes := 1;
break;
end
else
begin
if(tSubsumesF) then
begin
m_best.OwnItems := true;
first.m_premise.OwnValues := false;
first.m_OwnValues := true;
m_best.removeAt(i);
m_best.OwnItems :=false;
dec(i);
subsumes := 2;
redundant := true;
end;
end;
inc(i);
end;
if(subsumes = 0) or (subsumes = 2) then
begin
if (compareRuleItem(toInsert)<> 0) then
begin
m_best.add(toInsert);
m_best.Sort(toCompareRuleItem);
end;
end;
result := redundant;
end;

function TDMRuleGeneration.generateRules(numRules : integer; midPoints : DArray;
priors : TStringHashtable; curExpectation : double;
instances : TDMInstances; var best : TArrayList;
genTime : integer) : TArrayList;
var
redundant : boolean;
consequences,consequencesMinusOne : TDMFastVector;
premise : TDMItemSet;
s : integer;
current, old : TDMRuleItem;
hashtable : TStringHashtable ;
i, j, h,k : integer;
allRuleItems : TDMFastVector;
index : integer;
dummie : TDMRuleItem;
remove : boolean;
firstBest : TDMRuleItem;
plist : PPointerItemList;
itemToRemove : TDMItemSet;
reserved : boolean;

ii ,hh: integer;
rlSet:TDMRuleItem;

itSetc:TDMItemSet;
itsP :IArray;
itsC :IArray;
rF: TextFile;
begin

redundant := false;

consequences := TDMFastVector.Create();
consequencesMinusOne := nil;

reserved :=false;
s := 0;
current := nil;

m_change := false;
m_midPoints := midPoints;
m_priors := priors;
m_best := best;
m_expectation := curExpectation;
m_count := genTime;
m_instances := instances;

premise := nil;
premise := TDMItemSet.Create(m_totalTransactions);
premise.m_items := m_items;
premise.m_counter := m_counter;

repeat
begin
m_minRuleCount := 1;
while (expectation(m_minRuleCount, premise.m_counter, m_midPoints, m_priors) <= m_expectation) do
begin
inc(m_minRuleCount);
if(m_minRuleCount > premise.m_counter) then
begin
consequences.elementMemoryManagement(false);
FreeAndNil(consequences);
best:=nil;
result := m_best;
exit;
end;
end;
redundant := false;

i := 0;
while (i < instances.numAttributes()) do
begin
if(i = 0) then
begin
j := 0;
while(j < length(m_items)) do
begin
if(m_items[j] = -1) then
singleConsequence(instances, j,consequences);
inc(j);
end;
if(premise = nil) or (consequences.size() = 0) then
begin
result := m_best;
consequences.elementMemoryManagement(false);
FreeAndNil(consequences);
best:=nil;
exit;
end;
end;
allRuleItems := TDMFastVector.Create();
index := 0;
repeat
begin
h := 0;
while(h < consequences.size()) do
begin
current := generateRuleItem(premise,consequences.elementAt(h) as TDMItemSet,instances,m_count,m_minRuleCount,m_midPoints,m_priors);
if(current <> nil)then
begin
allRuleItems.addElement(current);
inc(h);
end else
consequences.removeAndClearElementAt(h);
end;

if(index = i) then
begin
consequences := consequences.copyObject() as TDMFastVector;
break;
end;

consequencesMinusOne := consequences;
consequences := ItemSet.mergeAllItemSets(consequencesMinusOne, index, instances.numInstances());
hashtable := ItemSet.getHashtable(consequencesMinusOne, consequencesMinusOne.size());
consequences := ItemSet.pruneItemSets(consequences, hashtable);
FreeAndNil(hashtable);

inc(index);
end;
until not (consequences.size() > 0);

h := 0;
while (h < allRuleItems.size())do
begin
current := allRuleItems.elementAt(h) as TDMRuleItem;
inc(m_count);
if(m_best.Count < numRules) then
begin
m_change := true;
redundant:=removeRedundantRuleItem(current);
end
else
begin
if(current.accuracy() > m_expectation) then
begin
plist := m_best.ItemList;
firstBest := plist^[0];
m_expectation := firstBest.accuracy();
m_best.OwnItems := true;

firstBest.m_OwnValues := true;
m_best.removeAt(0);
m_best.OwnItems := false;
m_change := true;
redundant:=removeRedundantRuleItem(current);

plist := m_best.ItemList;
firstBest := plist^[0];
m_expectation := firstBest.accuracy();
while(expectation(m_minRuleCount,(current.premise()).m_counter,m_midPoints,m_priors) < m_expectation)do
begin
inc(m_minRuleCount);
if(m_minRuleCount > (current.premise()).m_counter) then
break;
end;
end
else
begin

current.m_OwnValues := true;
FreeAndNil(current);
end;
end ;
inc(h);
end;
inc(i);
allRuleItems.elementMemoryManagement(false);
FreeAndNil(allRuleItems);

end;
end;
until (not redundant);

FreeAndNil(premise);
consequences.elementMemoryManagement(false);
FreeAndNil(consequences);
best:=nil;
result := m_best;
end;


function TDMRuleGeneration.compareRuleItem (Item: Pointer): Integer;
var
i : integer;
plist : PPointerItemList;


itemSet : TDMRuleItem;
itemSetToCompare : TDMRuleItem;
PTDMRuleItem : ^TDMRuleItem;
curResult : integer;
begin

i := 0;
plist := m_best.ItemList;

result := -1;

while (i < m_best.Count) do
begin
itemSet := plist^[i];

result := itemSet.compareTo(item);
if (result = 0) then
exit;
inc(i);
end;


end;

function tocompareRuleItem (Value,Item: Pointer): Integer;
var

itemSet : TDMRuleItem;
itemSetToCompare : TDMRuleItem;
PTDMRuleItem : ^TDMRuleItem;
begin

PTDMRuleItem := Item;
itemSetToCompare := PTDMRuleItem^;


itemSet := value;

result := (itemSet as TDMRuleItem).compareTo(item);

end;

destructor TDMRuleGeneration.Destroy;
begin
if(m_OwnValues)then
begin
FreeAndNil(m_items);
m_midPoints :=nil;
end;
m_priors := nil;
m_best := nil;
m_instances:=nil;
end;
end.
Соседние файлы в папке DMAssociations