Скачиваний:
29
Добавлен:
01.05.2014
Размер:
14.41 Кб
Скачать
unit Apriori;

interface

uses
Classes,
FastVector,
Instances,
DmmTypes,
Tag,
AttributeStats,
SysUtils,
uContainers,
AprioriItemSet,
utils,
DoubleObject,
ItemSet,
associator,
dmm,
Windows;

type

TDMApriori = class (TDMAssociator)
protected
//минимальная поддержка
m_minSupport : double;

//верхняя граница минимальной поддержки
m_upperBoundMinSupport : double;

//нижняя граница минимальной поддержки
m_lowerBoundMinSupport : double;

//выбранный тип метрики
m_metricType : integer;

//минимальная величина метрики
m_minMetric : double;

//максимальное число правил
m_numRules : integer;

//величина на которую уменьшаеся значение m_minSupport на каждой итерации
m_delta : double;

m_cycles : integer;

// все наборы
m_Ls : TDMFastVector;

//все наборы в хэш таблице
m_hashtables : TDMFastVector ;

m_outputItemSets : boolean;

m_removeMissingCols : boolean ;

//выдавать сообщения о ходе выполнения
m_verbose : boolean;

// Массив из всех возможных правил
m_allTheRules : array of TDMFastVector;

m_instances : TDMInstances ;

public
constructor Create();
function globalInfo() : AnsiString;
procedure resetOptions();
procedure setInstances(instances : TDMInstances);
procedure setNumRules(numRules : integer);
procedure setLowerBoundMinSupport(lowerBoundMinSupport : double);
procedure setMinMetric(minMetric : double);
procedure buildModel(instances : TDMInstances); override;
procedure ClearModel(); override;
function toString() : AnsiString;
function toStrings() : TStrings; override;
destructor Destroy(); override;

private
//поиск частых наборов
procedure findLargeItemSets(instances : TDMInstances);
//поиск правил
procedure findRulesQuickly();
end;

implementation

constructor TDMApriori.Create();
begin
m_Ls := nil;
m_hashtables := nil;
m_instances:=nil;
m_allTheRules := nil;
resetOptions();
end;

function TDMApriori.globalInfo() : AnsiString;
begin
result := 'Поиск ассоциативных правил';
end;

procedure TDMApriori.resetOptions();
begin
m_removeMissingCols := false;
m_verbose := false;
m_delta := 0.05;
m_minMetric := 0.90;
m_numRules := 100;
m_lowerBoundMinSupport := 0.1;
m_upperBoundMinSupport := 1.0;
m_outputItemSets := false;
end;

procedure TDMApriori.setInstances(instances : TDMInstances);
begin
m_instances := TDMInstances.Create(instances);
end;

procedure TDMApriori.setNumRules(numRules : integer);
begin
m_numRules := numRules;
end;

procedure TDMApriori.setLowerBoundMinSupport(lowerBoundMinSupport : double);
begin
m_lowerBoundMinSupport := lowerBoundMinSupport;
end;

procedure TDMApriori.setMinMetric(minMetric : double);
begin
m_minMetric := minMetric;
end;

procedure TDMApriori.findLargeItemSets(instances : TDMInstances);
var
kMinusOneSets : TDMFastVector;
kSets: TDMFastVector;
aprioriHashtable : TStringHashtable;
necSupport, necMaxSupport,i : integer;
begin
necSupport := 0;
necMaxSupport := 0;
i := 0;
necSupport := trunc(m_minSupport * instances.numInstances()+0.5);
necMaxSupport := trunc(m_upperBoundMinSupport * instances.numInstances()+0.5);

kSets := AprioriItemSet.singletons(instances);
ItemSet.upDateCounters(kSets, instances);
kSets := ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
if (kSets.Size()<> 0) then
repeat
begin
m_Ls.addElement(kSets);
kMinusOneSets := kSets;
kSets := AprioriItemSet.mergeAllItemSets(kMinusOneSets, i, instances.numInstances());
aprioriHashtable := ItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
m_hashtables.addElement(aprioriHashtable);
kSets := ItemSet.pruneItemSets(kSets, aprioriHashtable);
ItemSet.upDateCounters(kSets, instances);
kSets := ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
inc(i);
end
until (kSets.size() <= 0);
FreeAndNil(kSets);
end;

procedure TDMApriori.findRulesQuickly();
var
rules : TDMFastVectorArray ;
j,k : integer;
currentItemSets : TDMFastVector;
enumItemSets : TDMFastVectorEnumeration;
currentItemSet : TDMAprioriItemSet;
begin
j := 1;
while (j < m_Ls.size()) do
begin
currentItemSets := m_Ls.elementAt(j) as TDMFastVector;
enumItemSets := TDMFastVectorEnumeration.Create(currentItemSets);
while (enumItemSets.hasMoreElements()) do
begin
currentItemSet := enumItemSets.nextElement() as TDMAprioriItemSet;
rules := currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1);

k := 0;
while (k < rules[0].size()) do
begin
m_allTheRules[0].addElement(rules[0].elementAt(k));
m_allTheRules[1].addElement(rules[1].elementAt(k));
m_allTheRules[2].addElement(rules[2].elementAt(k));
inc(k);
end;
rules := nil;
end;
FreeandNil(enumItemSets);
inc(j);
end;
end;

procedure TDMApriori.buildModel(instances : TDMInstances);
var
confidences, supports : DArray;
indices : IArray;
necSupport : integer;
sortedRuleSet : TDMFastVectorArray;
i,j : integer;
sortType : integer;
curItemSets : TDMFastVector;
curItemSetsHashtables : TStringHashtable;
begin

setInstances(instances);

necSupport:=0;
SetLength(m_allTheRules,3);
SetLength(sortedRuleSet,3);

if (instances.checkForStringAttributes()) then
raise Exception.Create('Строковые атрибуты не поддерживаются');

// Decrease minimum support until desired number of rules found.
m_cycles := 0;
m_minSupport := m_upperBoundMinSupport - m_delta;

if m_minSupport < m_lowerBoundMinSupport then
m_minSupport := m_lowerBoundMinSupport;

// Reserve space for variables
m_Ls := TDMFastVector.Create();
m_hashtables := TDMFastVector.Create();
m_allTheRules[0] := TDMFastVector.Create();
m_allTheRules[1] := TDMFastVector.Create();
m_allTheRules[2] := TDMFastVector.Create();

sortedRuleSet[0] := TDMFastVector.Create();
sortedRuleSet[1] := TDMFastVector.Create();
sortedRuleSet[2] := TDMFastVector.Create();

repeat
begin
if (terminated) then
begin
ExitThread(1);
destroy;
end;
findLargeItemSets(instances);
findRulesQuickly();

SetLength(supports, m_allTheRules[2].size());

i := 0;
while (i < m_allTheRules[2].size()) do
begin
supports[i] := (m_allTheRules[1].elementAt(i) as TDMAprioriItemSet).support();
inc(i);
end;
indices := Utils.stableSort(supports);

i := 0;
while (i < m_allTheRules[2].size()) do
begin
sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
inc(i);
end;

// сортировка по достоверности
m_allTheRules[0].removeAllElements();
m_allTheRules[1].removeAllElements();
m_allTheRules[2].removeAllElements();

SetLength(confidences, sortedRuleSet[2].size());
sortType := 2 + m_metricType;

i := 0;
while (i < sortedRuleSet[2].size()) do
begin
confidences[i] := (sortedRuleSet[sortType].elementAt(i) as TDMDoubleObject).doubleValue();
inc(i);
end;
indices := Utils.stableSort(confidences);

i := sortedRuleSet[0].size() - 1;
while ( (i >= (sortedRuleSet[0].size() - m_numRules)) and (i >= 0)) do
begin
m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
dec(i);
end;

m_minSupport := m_minSupport - m_delta;

necSupport := trunc(m_minSupport * instances.numInstances()+0.5);
inc(m_cycles);

SetLength(confidences, 0);
SetLength(supports, 0);
SetLength(indices, 0);

if((m_allTheRules[0].size() < m_numRules)
and (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport))
and (necSupport >= 1)) then
begin
//удаление m_Ls
j := 0;
while (j < m_Ls.size()) do
begin
curItemSets := m_Ls.elementAt(j) as TDMFastVector;
curItemSets.removeAllElements();
inc(j);
end;
m_Ls.removeAllElements();

//удаление m_hashtables
j := 0;
while (j < m_hashtables.size()) do
begin
curItemSetsHashtables := m_hashtables.elementAt(j) as TStringHashtable;
curItemSetsHashtables.OwnValues := true;
FreeAndNil(curItemSetsHashtables);
inc(j);
end;
m_hashtables.removeAllElements();

//удаление правил
sortedRuleSet[0].removeAllElements();
sortedRuleSet[1].removeAllElements();
sortedRuleSet[2].removeAllElements();

m_allTheRules[0].removeAndClearAllElements();
m_allTheRules[1].removeAndClearAllElements();
m_allTheRules[2].removeAndClearAllElements();
end;
end
until not ((m_allTheRules[0].size() < m_numRules)
and (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport))
and (necSupport >= 1));

sortedRuleSet[0].removeAllElements();
sortedRuleSet[1].removeAllElements();
sortedRuleSet[2].removeAllElements();

FreeandNil( sortedRuleSet[0]);
FreeandNil( sortedRuleSet[1]);
FreeandNil( sortedRuleSet[2]);
SetLength(sortedRuleSet, 0);

m_minSupport := m_minSupport + m_delta;
end;

procedure TDMApriori.ClearModel();
var
j : integer;
curItemSets : TDMFastVector;
curItemSetsHashtables : TStringHashtable;
begin
if(m_Ls <> nil) then
begin
j := 0;
while (j < m_Ls.size()) do
begin
curItemSets := m_Ls.elementAt(j) as TDMFastVector;
curItemSets.removeAllElements();
inc(j);
end;
m_Ls.removeAllElements();
m_Ls := nil;
end;

if(m_hashtables <> nil) then
begin
j := 0;
while (j < m_hashtables.size()) do
begin
curItemSetsHashtables := m_hashtables.elementAt(j) as TStringHashtable;
curItemSetsHashtables.OwnValues := true;
FreeAndNil(curItemSetsHashtables);
inc(j);
end;
m_hashtables.removeAllElements();
m_hashtables := nil;
end;
end;

function TDMApriori.toString() : AnsiString;
var
text : AnsiString;
temp, temp1 : AnsiString;
i,j : integer;
val : double;
begin
text :='';
if (m_Ls.size() <= 1) then
begin
result := 'Не облнаружено наборов и правил';
exit;
end;
text := text + 'Apriori' +#10#13;
Str(m_minSupport:4:2,temp);
text := text + 'Минимальная поддержка: ' + temp + #10#13;
text := text + 'Минимальное значение метрики ';

text := text + 'достоверность: ';
Str(m_minMetric:4:2,temp);
text := text + temp +#10#13;
text := text + 'Найденные правила:' + #10#13;

i := 0;
while (i < m_allTheRules[0].size()) do
begin
val := i+1;
Str(val:round(ln(m_numRules)/ln(10)+1):2,temp);
Str((m_allTheRules[2].elementAt(i) as TDMDoubleObject).doubleValue:4:2,temp1);

text := text + temp + '. ' + (m_allTheRules[0].elementAt(i) as TDMAprioriItemSet).
toString(m_instances)
+ ' ==> ' + (m_allTheRules[1].elementAt(i) as TDMAprioriItemSet).
toString(m_instances) +' достоверность:('+ temp1 +')';

text := text + #10#13;
inc(i);
end;
result := text;
end;

function TDMApriori.toStrings() : TStrings;
var
text : TStrings;
strText : AnsiString;
temp, temp1 : AnsiString;
i,j : integer;
val : double;
begin
text := TStringList.Create;

text.Clear;
if (m_Ls.size() <= 1) then
begin
text.Add('Не облнаружено наборов и правил');
result := text;
exit;
end;
text.Add('Apriori');
text.Add('Данные: ' + inst.relationName);
Str(m_minSupport:4:2,temp);
text.Add('Минимальная поддержка: ' + temp);

strText:='';
strText := strText + 'Минимальное значение метрики ';
strText := strText + 'достоверность: ';
Str(m_minMetric:4:2,temp);

text.Add(strText + temp);

text.Add('Найденные правила:');

i := 0;
while (i < m_allTheRules[0].size()) do
begin
val := i+1;
Str(val:round(ln(m_numRules)/ln(10)+1):0,temp);
Str((m_allTheRules[2].elementAt(i) as TDMDoubleObject).doubleValue:4:2,temp1);

strText:='';
strText := strtext + temp + '. '
+ (m_allTheRules[0].elementAt(i) as TDMAprioriItemSet).
toString(m_instances)
+ ' ==> ' + (m_allTheRules[1].elementAt(i) as TDMAprioriItemSet).
toString(m_instances)
+' достоверность:('+ temp1 +')';
text.Add(strtext);
inc(i);
end;
result := text;
end;

destructor TDMApriori.Destroy();
var
i : integer;
begin

inherited destroy;

ClearModel();
FreeAndNil(m_instances);

//удаление правил
if(length(m_allTheRules)<>0)then
begin
m_allTheRules[0].removeAndClearAllElements();
m_allTheRules[1].removeAndClearAllElements();
m_allTheRules[2].removeAndClearAllElements();

FreeandNil(m_allTheRules[0]);
FreeandNil(m_allTheRules[1]);
FreeandNil(m_allTheRules[2]);

setlength(m_allTheRules,0);
end;

end;

end.
Соседние файлы в папке DMAssociations