Скачиваний:
28
Добавлен:
01.05.2014
Размер:
68.33 Кб
Скачать
unit Instances;

interface

uses
Attribute,
AttributeStats,
Classes,
Dialogs,
DmmConstants,
DmmTypes,
Exceptions,
FastVector,
Math,
SerializableObject,
StandardSerializableObject,
StreamTokenizer,
SysUtils,
Randomizer,
Utils;

function missingValue : Double;
function isMissingValue( const value : Double ) : Boolean;

type
{Класс вектор}
TDMInstance = class;

{Класс итератор для перебора объектов типа TDMAttribute}
TDMAttributeEnumeration = class;

{Класс итератор для перебора объектов типа TDMInstance}
TDMInstanceEnumeration = class;


{Класс, который содержит набор данных}
TDMInstances = class( TDMSerializableObject )
protected
m_Attributes : TDMFastVector;
m_ClassIndex : Integer;
m_Instances : TDMFastVector;
m_RelationName : AnsiString;

public
constructor Create( relationName : AnsiString; attInfo : TDMFastVector; const capacity : Integer ); overload;
constructor Create( dataset : TDMInstances ); overload;
constructor Create( dataset : TDMInstances; const capacity : Integer ); overload;
constructor Create( source : TDMInstances; const first, toCopy : Integer ); overload;
constructor Create (reader : TStream); overload;
function CloneAttributes():TDMInstances;
function CloneInstances() : TDMInstances;
destructor Destroy; override;
procedure RemoveData;

function Cr_Att(dataset : TDMInstances): TDMInstances;
procedure setClassIndex_pr(dataset : TDMInstances);
procedure setRelationName_pr(dataset : TDMInstances);
procedure setAttributes_pr(dataset : TDMInstances);

function classAttribute : TDMAttribute;
function classIndex : Integer;
function numClasses : Integer;
function numInstances : Integer;
function relationName : AnsiString;
{Создает тренировочный набор для одной перекрестной проверки
на наборе данных. Данные случайные
параметр numFolds - число слоев в перекрестной проверке д.б. >1
параметр numFold - 0 для 1-го слоя, 1 для 2-го и т.д.
возвращает тренировочный набор}
function trainCV(numFolds : Integer; numFold : Integer; r : Integer{TDMRnd}) : TDMInstances;
function testCV(numFolds : Integer; numFold : Integer) : TDMInstances;
procedure swap(i, j : Integer);

procedure add( instance : TDMInstance ; flagCopy : boolean = true);
function instance( const index : Integer ) : TDMInstance;
function firstInstance : TDMInstance;
function lastInstance : TDMInstance;

function checkInstance( instance : TDMInstance ) : Boolean;

function attribute( const index : Integer ) : TDMAttribute; overload;
function attribute( const name : AnsiString ) : TDMAttribute; overload;
function attributes() : TDMFastVector;

function enumerateAttributes : TDMAttributeEnumeration;
function enumerateInstances : TDMInstanceEnumeration;

procedure insertAttributeAt( attribute : TDMAttribute; const position : Integer );
function checkForStringAttributes : Boolean;
function numAttributes : Integer;

function attributeStats( const index : Integer ) : TDMAttributeStats;
function attributeToDoubleArray( const index : Integer ) : TDMInstanceValues;
function sumOfWeights : Double;

procedure copyInstances( const from : Integer; dest : TDMInstances; const num : Integer );
procedure delete;

function equalHeaders( dataset : TDMInstances ) : Boolean;

procedure serializeObject( ostream : TStream ); override;
procedure deserializeObject( istream : TStream ); override;

function toString : AnsiString;
function toStringI : AnsiString;
function toStringII(num:I2IArray;k:Integer) : AnsiString;
function toStringTT : TStringList;

{Возвращает мат.ожидание для числового атрибута в виде вещественного
числа с плавающей точкой. Возвращает 0, если атрибут ни числовой ни
номинальный или отсутствуют все значения.
attIndex - индекс атрибута }
function meanOrMode(attIndex:integer):double;

procedure deleteWithMissing(attIndex : integer); overload;
procedure deleteWithMissing(att : TDMAttribute); overload;
procedure deleteWithMissingClass();
//количество пропущенных значений для атрибута
function attrMissingValues(attrIndex : integer):integer;
//число уникальных значений атрибута
function numUniqueValues(attIndex : integer) :integer;
procedure setClassIndex(classIndex : integer);
procedure sort(att : TDMAttribute); overload;
procedure sort(attIndex : integer); overload;
procedure compactify();
function variance(attIndex:integer):double;
//число значение атрибута
function numDistinctValues(attIndex : integer) :integer;
//число векторов, содержащих заданное значение качественного атрибута
function instForAttrValue(attrIndex : integer; valueIndex: integer) : integer;
function instFor2AttrValue(attrIndex : integer; valueIndex: integer;
dattrIndex : integer; dvalueIndex: integer ) : integer;
//минимальное значение количественного атрибута
function minNumAttrValue(attIndex : integer): double;
//максимальное значение количественного атрибута
function maxNumAttrValue(attIndex : integer): double;
//стандартное отклонение
function stdDeviation(attIndex : integer): double;
function sortInstancesByNomAttr(attIndex : integer;attrValue : AnsiString) : IArray;
function instForAttrValueInInterval(arr : DArray; attrIndex : integer; value :AnsiString; step : double; intNumber : integer):integer; overload;
function instForAttrValueInInterval(arr : DArray; attrIndexX : integer; attrIndexY :integer; step : double; intNumber : integer):integer;overload;
procedure deleteAttributeAt(position : integer);
procedure replaceWithMissing(attIndex : integer;
value : double);
function attrCopyStructure() : TDMInstances;
{Перемещаем instance в наборе так, чтобы они были упорядочены случайно}
procedure randomiz(r : Integer{TDMRnd});
function getRandomNumberGenerator( seed : integer) : integer;

procedure setRelationName(newName : AnsiString);

protected
procedure quickSort(attIndex : integer; lo0 : integer; hi0 : integer);
procedure initTokenizer(var tokenizer : TDMStreamTokenizer);
procedure readHeader(tokenizer : TDMStreamTokenizer );
procedure getFirstToken(var tokenizer : TDMStreamTokenizer);
procedure errms(tokenizer : TDMStreamTokenizer; theMsg : AnsiString);
procedure getNextToken(var tokenizer : TDMStreamTokenizer);
procedure getLastToken(tokenizer : TDMStreamTokenizer; endOfFileOk : boolean );
procedure readTillEOL(tokenizer : TDMStreamTokenizer);
function getInstance(tokenizer : TDMStreamTokenizer; flag : boolean ) : boolean;
function getInstanceFull(tokenizer : TDMStreamTokenizer; flag : boolean) : boolean;
end;

TDMInstance = class( TDMStandardSerializableObject )
protected
m_AttValues : TDMInstanceValues;
m_Size : Integer;
m_Dataset : TDMInstances;
m_Weight : Double;

public
constructor Create( instance : TDMInstance ); overload;
constructor Create( const numAttributes : Integer ); overload;
constructor Create( const weight : Double; attValues : TDMInstanceValues; const size : Integer ); overload;
constructor Create( weight : double; attValues : TDMInstanceValues); overload;
constructor Create( weight : double; attValues : TDMInstanceValues;
dataset : TDMInstances); overload;

function attribute( const index : Integer ) : TDMAttribute;
function numAttributes : Integer;
function weight : Double;
function size : integer;
procedure setWeight( const weight : Double );

function classAttribute : TDMAttribute;
function classIndex : Integer;

procedure deleteAttributeAt( const position : Integer );
procedure insertAttributeAt( const position : Integer );
function mergeInstance( instance : TDMInstance ) : TDMInstance;

function enumerateAttributes : TDMAttributeEnumeration;

function classIsMissing : Boolean;
procedure setClassMissing;
function classValue : Double;
procedure setClassValue( const value : Double );
function numClasses : Integer;

function hasMissingValue : Boolean;

function dataset : TDMInstances;
procedure setDataset( dataset : TDMInstances );

function equalHeaders( otherInstance : TDMInstance ) : Boolean;

function index( const position : Integer ) : Integer;

function value( const index : Integer ) : Double; overload;
function value( attribute : TDMAttribute ) : Double; overload;
function stringValue( const index : Integer ) : AnsiString; overload;
function stringValue( attribute : TDMAttribute ) : AnsiString; overload;

procedure setValue( const index : Integer; const value : Double ); overload;
procedure setValue( attribute : TDMAttribute; const value : Double ); overload;
procedure setValue( const index : Integer; const value : AnsiString ); overload;
procedure setValue( attribute : TDMAttribute; const value : AnsiString ); overload;

function isMissing( const index : Integer ) : Boolean; overload;
function isMissing( attribute : TDMAttribute ) : Boolean; overload;

procedure setMissing( const index : Integer ); overload;
procedure setMissing( attribute : TDMAttribute ); overload;

procedure replaceMissingValues( replacement : TDMInstanceValues );

function cloneObject : TObject; override;
function copyObject : TObject; override;
function toString : AnsiString; overload; override;
function toString( ignoreAttr:BoolArray) : AnsiString; overload;
procedure serializeObject( ostream : TStream ); override;
procedure deserializeObject( istream : TStream ); override;
procedure forceDeleteAttributeAt(position : integer);
destructor Destroy; override;
end;

TDMAttributeEnumeration = class( TDMFastVectorEnumeration )
public

constructor Create( dataset : TDMInstances );

function hasMoreElements : Boolean; override;
function nextElement : TDMAttribute; reintroduce;
end;

TDMInstanceEnumeration = class( TDMFastVectorEnumeration )
public

constructor Create( dataset : TDMInstances );

function hasMoreElements : Boolean; override;
function nextElement : TDMInstance; reintroduce;
end;



implementation

constructor TDMInstances.Create( relationName : AnsiString; attInfo : TDMFastVector; const capacity : Integer );
var
i, n1 : Integer;
begin
m_RelationName := relationName;
m_ClassIndex := -1;
m_Attributes := attInfo.copyObject as TDMFastVector;
n1 := m_Attributes.size - 1;
for i := 0 to n1 do attribute( i ).setIndex( i );
m_Instances := TDMFastVector.Create( capacity );
end;

function TDMInstances.CloneAttributes():TDMInstances;
var
i, n1 : Integer;
data : TDMInstances;
begin
data := TDMInstances.Create();
data.m_RelationName := m_relationName;
data.m_ClassIndex := -1;
data.m_Attributes :=m_Attributes;
n1 := data.m_Attributes.size - 1;
for i := 0 to n1 do data.attribute( i ).setIndex( i );
data.m_Instances := TDMFastVector.Create( numInstances );
result := data;
end;

constructor TDMInstances.Create( dataset : TDMInstances; const capacity : Integer );
begin
m_ClassIndex := dataset.m_ClassIndex;
m_RelationName := dataset.m_RelationName;
m_Attributes := dataset.m_Attributes.copyObject as TDMFastVector;
m_Instances := TDMFastVector.Create( capacity );
end;

constructor TDMInstances.Create( dataset : TDMInstances );
var
i : integer;
begin
m_ClassIndex := dataset.m_ClassIndex;
m_RelationName := dataset.m_RelationName;
m_Attributes := dataset.m_Attributes.copyObject as TDMFastVector;
m_Instances := TDMFastVector.Create( dataset.numInstances );
dataset.copyInstances( 0, Self, dataset.numInstances );
end;

function TDMInstances.CloneInstances() : TDMInstances;
var
i : integer;
data : TDMInstances;
begin
data := TDMInstances.Create();
data.m_ClassIndex := m_ClassIndex;
data.m_RelationName := m_RelationName;
data.m_Attributes := m_Attributes;
data.m_Instances := TDMFastVector.Create( numInstances );
for i:=0 to numInstances-1 do
data.m_Instances.addElement(m_Instances.elementAt(i));
result := data;
end;

function TDMInstances.Cr_Att(dataset : TDMInstances): TDMInstances;
var
data : TDMInstances;
begin
data := TDMInstances.Create();
data.m_ClassIndex := dataset.m_ClassIndex;
data.m_RelationName := dataset.m_RelationName;
result := data;
end;

procedure TDMInstances.setClassIndex_pr(dataset : TDMInstances);
begin
m_ClassIndex := dataset.m_ClassIndex;
end;

procedure TDMInstances.setRelationName_pr(dataset : TDMInstances);
begin
m_RelationName := dataset.m_RelationName;
end;

procedure TDMInstances.setAttributes_pr(dataset : TDMInstances);
begin
m_Attributes := dataset.m_Attributes.copyObject as TDMFastVector;
end;

constructor TDMInstances.Create( source : TDMInstances; const first, toCopy : Integer );
begin
m_ClassIndex := source.m_ClassIndex;
m_RelationName := source.m_RelationName;
m_Attributes := source.m_Attributes.copyObject as TDMFastVector;;
m_Instances := TDMFastVector.Create( toCopy );
source.copyInstances( first, Self, toCopy );
end;

constructor TDMInstances.Create(reader : TStream);
var
tokenizer : TDMStreamTokenizer;
begin
tokenizer := TDMStreamTokenizer.Create(reader);
initTokenizer(tokenizer);
readHeader(tokenizer);
m_ClassIndex := -1;
m_Instances := TDMFastVector.Create(1000);
while (getInstance(tokenizer, true)) do
begin end;
compactify();
end;

destructor TDMInstances.Destroy;
begin
freeandnil(m_Instances);
freeandnil(m_Attributes);
end;

procedure TDMInstances.RemoveData;
begin
m_Attributes := nil;
m_Instances.elementMemoryManagement(false);
end;

procedure TDMInstances.initTokenizer(var tokenizer : TDMStreamTokenizer);
begin
tokenizer.resetSyntax();
tokenizer.whitespaceChars(0, ord(' '));
tokenizer.wordChars(ord(' ') + 1,256);
tokenizer.whitespaceChars(ord(','), ord(','));
tokenizer.commentChar(ord('%'));
tokenizer.quoteChar(ord('"'));
tokenizer.quoteChar(ord(''''));
tokenizer.ordinaryChar(ord('{'));
tokenizer.ordinaryChar(ord('}'));
end;

procedure TDMInstances.errms(tokenizer : TDMStreamTokenizer; theMsg : AnsiString);
begin
raise EIOException.Create(theMsg + ', прочитано ' + tokenizer.toString() );
end;

procedure TDMInstances.getFirstToken(var tokenizer : TDMStreamTokenizer);
begin
while (tokenizer.nextToken() = StreamTokenizer.TT_EOL) do
begin
end;
if ((tokenizer.ttype = ord('\')) or (tokenizer.ttype = ord('"'))) then
tokenizer.ttype := StreamTokenizer.TT_WORD
else if ((tokenizer.ttype = StreamTokenizer.TT_WORD) and (tokenizer.sval = '?'))then
tokenizer.ttype := ord('?');
end;

procedure TDMInstances.getLastToken(tokenizer : TDMStreamTokenizer; endOfFileOk : boolean );
begin
if ( (tokenizer.nextToken() <> StreamTokenizer.TT_EOL) and
( (tokenizer.ttype <> StreamTokenizer.TT_EOF) or not endOfFileOk)) then
errms(tokenizer,'не найден конец строки');
end;

procedure TDMInstances.getNextToken(var tokenizer : TDMStreamTokenizer);
begin
if (tokenizer.nextToken() = StreamTokenizer.TT_EOL) then
errms(tokenizer,'неожиданный конец строки');

if (tokenizer.ttype = StreamTokenizer.TT_EOF) then
errms(tokenizer,'неожиданный конец файла')
else if ( (tokenizer.ttype = ord('''')) or (tokenizer.ttype = ord('"')) ) then
tokenizer.ttype := StreamTokenizer.TT_WORD
else if ((tokenizer.ttype = StreamTokenizer.TT_WORD) and
(tokenizer.sval = ('?'))) then
tokenizer.ttype := ord('?');
end;

procedure TDMInstances.readTillEOL(tokenizer : TDMStreamTokenizer);
begin
while (tokenizer.nextToken() <> StreamTokenizer.TT_EOL) do
begin end;
tokenizer.pushFront();
end;

function TDMInstances.getInstanceFull(tokenizer : TDMStreamTokenizer; flag : boolean) : boolean;
var
instance : TDMInstanceValues;
index : integer ;
i : integer;
k : integer;
j : integer;
st : String;
newInstance : TDMInstance;
int : integer;
negValue : integer;
begin
SetLength(instance, numAttributes());

// Получение значений для всех признаков.
i := 0;
while (i < numAttributes() ) do
begin
// Получение следующего токена
if (i > 0) then getNextToken(tokenizer);

//Проверка: отсутствует ли значение?
if (tokenizer.ttype = ord('?')) then
instance[i] := MISSING_VALUE
else
begin
//Проверка: Является ли токен правильным(допустимым).
if (tokenizer.ttype <> StreamTokenizer.TT_WORD) then
errms(tokenizer,'данное значение не может использоваться');

case (attribute(i).attrType()) of
ATTRIBUTE_TYPE_NOMINAL :
begin
//Проверка: Появляется ли значение в заголовке?
index := attribute(i).indexOfValue(tokenizer.sval);
if (index = -1) then
errms(tokenizer,'номинальное значение не определено в заголовке');
instance[i] := index;
end;

ATTRIBUTE_TYPE_NUMERIC :
begin
k := 1;
j := 1;
st := '';
negValue := 0;
setLength(st,length(tokenizer.sval));
if (tokenizer.sval[k] = '-') then
begin
negValue:=1;
inc(k);
end;

//целая часть
while (k <= length(tokenizer.sval)) and
(tokenizer.sval[k] <> '.') do
begin
st[j] := tokenizer.sval[k];
inc(k);
inc(j)
end;
setLength(st,j-1);

try
if (st <> '') then instance[i] := StrToInt64(st);
if (k > length(tokenizer.sval)) and (negValue = 1) then
instance[i] := -instance[i];
except
on E : Exception do ShowMessage('Ошибка в записи числа');
end;

//дробная часть
if ( (length(tokenizer.sval)>length(st)+ negValue) and
(tokenizer.sval[k] = '.') ) then
begin
setLength(st,length(tokenizer.sval)-length(st)-1-negValue);
j := 1;
inc(k);
while (k <= length(tokenizer.sval)) do
begin
st[j] := tokenizer.sval[k];
inc(j);
inc(k);
end;

try
int := length(st);
if (st <> '') then
begin
instance[i] := instance[i] + StrToInt64(st)/power(10,length(st));
if (negValue = 1) then instance[i] := -instance[i];
end;
except
on E : Exception do ShowMessage('Ошибка в записи числа');
end;
end;
end;

ATTRIBUTE_TYPE_STRING:
instance[i] := attribute(i).addStringValue(tokenizer.sval);
else
errms(tokenizer,'неизвестный тип атрибута в столбце ' + IntToStr(i));
end;
end;
inc(i);
end;

if (flag) then getLastToken(tokenizer,true);

// Добавление instance к набору данных
newInstance := TDMInstance.Create(numAttributes());
i := 0;
newInstance.setDataset(self);
while (i < numAttributes() ) do
begin
newInstance.setValue(i,instance[i]);
inc(i);
end;
add( newInstance);
result := true;
end;

function TDMInstances.getInstance(tokenizer : TDMStreamTokenizer; flag : boolean ) : boolean;
begin
// Проверка: Были ли признаки объявлены?
if (m_Attributes.size() = 0) then
errms(tokenizer,'не описаны атрибуты');

// достигнут конец файла
getFirstToken(tokenizer);
if (tokenizer.ttype = StreamTokenizer.TT_EOF) then
begin
result := false;
Exit;
end;

// разобрать объект
result := getInstanceFull(tokenizer, flag);
end;

procedure TDMInstances.readHeader(tokenizer : TDMStreamTokenizer);
var
attributeName : AnsiString ;
attributeValues : TDMFastVector;
i : integer;
format : AnsiString;
m_ValueBuffer : array of double;
m_IndicesBuffer : array of double;
begin
// получить имя отношения
getFirstToken(tokenizer);
if (tokenizer.ttype = StreamTokenizer.TT_EOF) then
errms(tokenizer,'неожиданный конец файла');
if (Utils.equalsIgnorCase(ARFF_RELATION,tokenizer.sval)) then
begin
getNextToken(tokenizer);
m_RelationName := tokenizer.sval;
getLastToken(tokenizer,false);
end else
errms(tokenizer,'ключевое слово ' + ARFF_RELATION + ' не найдено');

m_Attributes := TDMFastVector.Create();

// получить описание атрибутов
getFirstToken(tokenizer);
if (tokenizer.ttype = StreamTokenizer.TT_EOF) then
errms(tokenizer,'неожиданный конец файла');

while (Utils.equalsIgnorCase(ARFF_ATTRIBUTE, tokenizer.sval)) do
begin
// получить имя атрибута
getNextToken(tokenizer);
attributeName := tokenizer.sval;
getNextToken(tokenizer);

// является ли атрибут номинальным
if (tokenizer.ttype = StreamTokenizer.TT_WORD) then
begin
//атрибут типа real, integer или string.
if ((Utils.equalsIgnorCase(ARFF_ATTRIBUTE_REAL, tokenizer.sval)) or
(Utils.equalsIgnorCase(ARFF_ATTRIBUTE_INTEGER, tokenizer.sval)) or
(Utils.equalsIgnorCase(ARFF_ATTRIBUTE_NUMERIC, tokenizer.sval))) then
begin
m_Attributes.addElement(TDMAttribute.Create(attributeName, numAttributes() ) );
end else
if (Utils.equalsIgnorCase(ARFF_ATTRIBUTE_STRING, tokenizer.sval)) then
begin
m_Attributes.
addElement(TDMAttribute.Create(attributeName, nil, numAttributes()));
end else

begin
errms(tokenizer,'несоответствие типа атрибута ');
end
end else
begin
// атрибут номинальный
attributeValues := TDMFastVector.Create();

// получить значения номинального атрибута
if (tokenizer.ttype <> ord('{')) then
errms(tokenizer,'{ дожна быть перед перечислением');

while (tokenizer.nextToken() <> ord('}')) do
begin
if (tokenizer.ttype = StreamTokenizer.TT_EOL) then
errms(tokenizer,'} необходима при перечислении')
else
attributeValues.addElement(TDMNominalAttributeValue.Create(tokenizer.sval));
end;
if (attributeValues.size() = 0) then
errms(tokenizer,'отсутствуют номинальные значения');

m_Attributes.
addElement(TDMAttribute.Create(attributeName, attributeValues, numAttributes()));
end;
getLastToken(tokenizer,false);
getFirstToken(tokenizer);
if (tokenizer.ttype = StreamTokenizer.TT_EOF) then
errms(tokenizer,'неожиданный конец файла');
end;

// выяснить следуют ли дальше данные
if (not (Utils.equalsIgnorCase(ARFF_DATA, tokenizer.sval))) then
errms(tokenizer,'ключевое слово ' + ARFF_DATA + ' требуется');

// определен ли хотя бы один атрибут
if (m_Attributes.size() = 0) then
errms(tokenizer,'no attributes declared');

SetLength(m_ValueBuffer, numAttributes());
SetLength(m_IndicesBuffer,numAttributes());
end;

function TDMInstances.relationName : AnsiString;
begin
result := m_RelationName;
end;

function TDMInstances.attribute( const index : Integer ) : TDMAttribute;
begin
Result := nil;
if ( index >= 0 ) and ( index < m_Attributes.size ) then
Result := m_Attributes.elementAt( index ) as TDMAttribute;
end;

function TDMInstances.attribute( const name : AnsiString ) : TDMAttribute;
var
i, n1 : Integer;
att : TDMAttribute;
begin
Result := nil;
n1 := m_Attributes.size - 1;
for i := 0 to n1 do
begin
att := attribute( i );
if ( att.name = name ) then
begin
Result := att;
Break;
end;
end;
end;

function TDMInstances.attributes() : TDMFastVector;
begin
result:=m_Attributes;
end;

procedure TDMInstances.insertAttributeAt( attribute : TDMAttribute; const position : Integer );
var
att, current : TDMAttribute;
i, n1 : Integer;
begin
if ( position >= 0 ) and ( position < m_Attributes.size ) then
begin
att := attribute.copyObject as TDMAttribute;
att.setIndex( position );
m_Attributes.insertElementAt( att, position );
n1 := m_Attributes.size - 1;
for i := position + 1 to n1 do
begin
current := m_Attributes.elementAt( i ) as TDMAttribute;
current.setIndex( current.index + 1 );
end;
n1 := m_Instances.size - 1;
for i := 0 to n1 do
begin
instance( i ).insertAttributeAt( position );
end;
if ( m_ClassIndex >= position ) then m_ClassIndex := m_ClassIndex + 1;
end;
end;

function TDMInstances.classAttribute : TDMAttribute;
begin
Result := nil;
if ( m_ClassIndex >= 0 ) then
Result := m_Attributes.elementAt( m_ClassIndex ) as TDMAttribute;
end;

function TDMInstances.classIndex : Integer;
begin
Result := m_ClassIndex;
end;

function TDMInstances.numClasses : Integer;
var
clAttribute : TDMAttribute;
begin
Result := -1;
if ( m_classIndex >= 0 ) then
begin
clAttribute := m_Attributes.elementAt( m_classIndex ) as TDMAttribute;
if ( clAttribute.isNominal ) then
Result := clAttribute.numValues
else
Result := 1;
end;
end;

function TDMInstances.numAttributes : Integer;
begin
Result := m_Attributes.size;
end;

function TDMInstances.numInstances : Integer;
begin
Result := m_Instances.size;
end;

function TDMInstances.testCV(numFolds : Integer; numFold : Integer) : TDMInstances;
var
numInstForFold, first, offset : Integer;
test : TDMInstances;
begin
if (numFolds < 2) then
begin
raise EIllegalArgumentException.Create('Number of folds must be at least 2!');
exit;
end;
if (numFolds > numInstances) then
begin
raise EIllegalArgumentException.Create('Cant have more folds than instances!');
exit;
end;
numInstForFold := numInstances div numFolds;
if (numFold < numInstances mod numFolds) then
begin
numInstForFold := numInstForFold + 1;
offset := numFold;
end
else
offset := numInstances mod numFolds;
test := TDMInstances.Create(Self, numInstForFold);
first := numFold * (numInstances div numFolds) + offset;
copyInstances(first, test, numInstForFold);
Result := test;
end;

procedure TDMInstances.randomiz(r : Integer);
var
j : Integer;
begin
j:=numInstances-1;
while j > 0 do
begin
swap(j, Random(j+1));
j := j - 1;
end;
end;

procedure TDMInstances.swap(i, j : Integer);
begin
m_Instances.swap(i, j);
end;

function TDMInstances.trainCV(numFolds : Integer; numFold : Integer; r : Integer{TDMRnd}) : TDMInstances;
var
train : TDMInstances;
numInstForFold, first, offset : Integer;
f:textfile;
begin
if (numFolds < 2) then
begin
raise EIllegalArgumentException.Create('Number of folds must be at least 2!');
exit;
end;
if (numFolds > numInstances) then
begin
raise EIllegalArgumentException.Create('Cant have more folds than instances!');
exit;
end;
numInstForFold := numInstances div numFolds;
if (numFold < numInstances mod numFolds) then
begin
Inc(numInstForFold);
offset := numFold;
end
else
offset := numInstances mod numFolds;
train := TDMInstances.Create(Self , numInstances - numInstForFold);

first := numFold * (numInstances div numFolds) + offset;
copyInstances(0, train, first);

Assign(f, 'out_EM_NumClust.temp');
Append(f);
writeln(f);
writeln(f, 'train1');
writeln(f, train.toString);

copyInstances(first + numInstForFold, train, numInstances - first - numInstForFold);

writeln(f);
writeln(f, 'train2');
writeln(f, train.toString);
train.randomiz(r);

writeln(f);
writeln(f, 'train3');
writeln(f, train.toString);
Close(f);

Result := train;
end;

procedure TDMInstances.copyInstances( const from : Integer; dest : TDMInstances; const num : Integer );
var
i, n1 : Integer;
begin
n1 := num - 1;
for i := 0 to n1 do
begin
dest.add( instance( from + i ) );
end;
end;

function TDMInstances.attributeToDoubleArray( const index : Integer ) : TDMInstanceValues;
var
i, n1 : Integer;
darr : TDMInstanceValues;
begin
n1 := m_Instances.size - 1;
SetLength( darr, n1 + 1 );
for i := 0 to n1 do
darr[ i ] := instance( i ).value( index );
Result := darr;
end;

function TDMInstances.attributeStats( const index : Integer ) : TDMAttributeStats;
var
current : TDMInstance;
stats : TDMAttributeStats;
values : DArray;
sorted : IArray;
i, n1, currentCount : Integer;
prev : Double;
begin
stats := TDMAttributeStats.Create;
if ( attribute( index ).isNominal ) then
SetLength( stats.nominalCounts, attribute( index ).numValues );

stats.totalCount := m_Instances.size;

values := DArray( attributeToDoubleArray( index ) );
sorted := Utils.sort( values );

currentCount := 0;
prev := missingValue;
n1 := m_Instances.size - 1;
for i := 0 to n1 do
begin
current := instance( sorted[ i ] );
if ( current.isMissing( index ) ) then
begin
stats.missingCount := m_Instances.size - i;
Break;
end;
if ( current.value( index ) = prev ) then
currentCount := currentCount + 1
else
begin
stats.addDistinct( prev, currentCount );
currentCount := 1;
prev := current.value( index );
end;
end;
stats.addDistinct( prev, currentCount );
{ Не считаем MISSING_VALUE за значение }
stats.distinctCount := stats.distinctCount - 1;
Result := stats;
end;

function TDMInstances.sumOfWeights : Double;
var
i, n1 : Integer;
begin
Result := 0;
n1 := m_Instances.size - 1;
for i := 0 to n1 do
begin
Result := Result + instance( i ).weight;
end;
end;

procedure TDMInstances.delete;
begin
m_Instances.elementMemoryManagement( True );
m_Instances.removeAllElements;
m_Instances.elementMemoryManagement( False );
end;

procedure TDMInstances.add( instance : TDMInstance ; flagCopy : boolean);
var
newInstance : TDMInstance;
begin
if (flagCopy) then
begin
newInstance := instance.copyObject as TDMInstance;
newInstance.setDataset( Self );
m_Instances.addElement( newInstance );
end
else
m_Instances.addElement( instance );
end;

function TDMInstances.instance( const index : Integer ) : TDMInstance;
begin
Result := m_Instances.elementAt( index ) as TDMInstance;
end;

function TDMInstances.firstInstance : TDMInstance;
begin
Result := m_Instances.firstElement as TDMInstance;
end;

function TDMInstances.lastInstance : TDMInstance;
begin
Result := m_Instances.lastElement as TDMInstance;
end;

function TDMInstances.checkForStringAttributes : Boolean;
var
i, n1 : Integer;
begin
Result := False;
n1 := m_Attributes.size - 1;
for i := 0 to n1 do
begin
if ( attribute( i ).isString ) then
begin
Result := True;
Break;
end;
end;
end;

function TDMInstances.checkInstance( instance : TDMInstance ) : Boolean;
var
i, n1, tdval : Integer;
begin
Result := true;
if ( instance.numAttributes <> m_Attributes.size ) then
begin
Result := false;
end
else
begin
n1 := m_Attributes.size - 1;
for i := 0 to n1 do
begin
if ( instance.isMissing( i ) ) then Continue
else
if ( attribute( i ).isNominal ) or ( attribute( i ).isString ) then
begin
tdval := Trunc( instance.value( i ) );
if ( tdval <> instance.value( i ) ) then
Result := false
else
if ( instance.value( i ) < 0 ) or
( instance.value( i ) >= attribute( i ).numValues ) then
Result := false;
end;
end;
end;
end;

function TDMInstances.enumerateAttributes : TDMAttributeEnumeration;
begin
Result := TDMAttributeEnumeration.Create( Self );
end;

function TDMInstances.enumerateInstances : TDMInstanceEnumeration;
begin
Result := TDMInstanceEnumeration.Create( Self );
end;


function TDMInstances.equalHeaders( dataset : TDMInstances ) : Boolean;
var
i, n1 : Integer;
begin
Result := true;
n1 := m_Attributes.size;
if ( m_ClassIndex <> dataset.m_ClassIndex )
then Result := false
else
if ( n1 <> dataset.m_Attributes.size )
then Result := false
else
begin
n1 := m_Attributes.size;
for i := 0 to n1 do
begin
if ( not ( attribute( i ).equals( dataset.attribute( i ) ) ) )
then
begin
Result := false;
Break;
end;
end;
end;
end;

procedure TDMInstances.serializeObject( ostream : TStream );
begin
end;

procedure TDMInstances.deserializeObject( istream : TStream );
begin
end;

function TDMInstances.toString : AnsiString;
var
i, n1 : Integer;
begin
(* Utils.quote необходим ! *)
Result:='';
Result := ARFF_RELATION + ' ' + Utils.simpleQuote( m_RelationName ) + ''#10#13''#10#13'';
n1 := m_Attributes.size - 1;
for i := 0 to n1 do
Result := Result + attribute( i ).toString + ''#10#13'';
Result := Result + ''#10#13'' + ARFF_DATA + ''#10#13'';
n1 := m_Instances.size - 1;
for i := 0 to n1 do
Result := Result + instance( i ).toString + ''#10#13'';
end;

function TDMInstances.toStringI : AnsiString;
var
i, n1 : Integer;
begin
Result:='';
n1 := m_Instances.size - 1;
for i := 0 to n1 do
Result := Result + instance( i ).toString + ''#10#13'';
end;

function TDMInstances.toStringTT : TStringList;
var
curStrings : TStringList;
n1,i : Integer;
begin
curStrings:=TStringList.Create();
n1 := m_Attributes.size - 1;
for i := 0 to n1 do
curStrings.add(attribute( i ).toString);
curStrings.add(ARFF_DATA);
result := curStrings;
end;

function TDMInstances.toStringII(num:I2IArray;k:integer) : AnsiString;
var
i, n1 : Integer;
begin
Result:='';
n1 := m_Instances.size - 1;
for i := 0 to n1 do
Result := Result + InttoStr(num[k][i])+' ' +instance( i ).toString + ''#10'';
end;

procedure TDMInstances.deleteWithMissing(attIndex : integer);
var
newInstances : TDMFastVector;
i : integer;
begin
newInstances := TDMFastVector.Create(numInstances());

i := 0;
while ( i < numInstances()) do
begin
if (not instance(i).isMissing(attIndex)) then
newInstances.addElement(instance(i));
inc(i);
end;
m_Instances := newInstances;
end;

procedure TDMInstances.deleteWithMissing(att : TDMAttribute);
begin
deleteWithMissing(att.index());
end;

function TDMInstances.attrMissingValues(attrIndex : integer):integer;
var
i:integer;
begin
i := 0;
result:=0;
while ( i < numInstances()) do
begin
if (instance(i).isMissing(attrIndex)) then
result := result + 1;
inc(i);
end;
end;

procedure TDMInstances.deleteWithMissingClass();
begin
if (m_ClassIndex < 0) then
raise EUnassignedClassException.Create('Индекс класса отрицательный / не установлен');

deleteWithMissing(m_ClassIndex);
end;

procedure TDMInstances.setClassIndex(classIndex : integer);
begin
if (classIndex >= numAttributes()) then
raise EIllegalArgumentException.Create('Неверный индекс класса: ' + IntToStr(classIndex));

m_ClassIndex := classIndex;
end;

procedure TDMInstances.sort(att : TDMAttribute);
begin
sort(att.index());
end;

procedure TDMInstances.sort(attIndex : integer);
var
i,j : integer;
begin
// Перемещение всех instance с отсутствием значений, чтобы закончить
j := numInstances() - 1;
i := 0;
while (i <= j) do
begin
if (instance(j).isMissing(attIndex)) then
begin
dec(j);
end else
begin
if (instance(i).isMissing(attIndex)) then
begin
swap(i,j);
dec(j);
end;
inc(i);
end;
end;
quickSort(attIndex, 0, j);
end;

procedure TDMInstances.quickSort(attIndex : integer; lo0 : integer; hi0 : integer);
var
lo, hi : integer;
mid : double;
begin
lo := lo0;
hi := hi0;

if (hi0 > lo0) then
begin
//нахождение элемента разделения - элемента, который делит
//массив пополам
mid := instance((lo0 + hi0) div 2).value(attIndex);

while(lo <= hi) do
begin
//найти первый элемент, который больше чем или равнен
// элементу разделения, начиная с левого индекса.
while ((instance(lo).value(attIndex) < mid) and (lo < hi0)) do inc(lo);

//найти первый элемент, который больше чем или равнен
// элементу разделения, начиная с правого индекса.
while ((instance(hi).value(attIndex) > mid) and (hi > lo0)) do dec(hi);

// Если индексы не пересеклись, поменять
if(lo <= hi) then
begin
swap(lo,hi);
inc(lo);
dec(hi);
end;
end;
//Если правый индекс не достиг левой стороны массива
// нужно сортировать левое разделение.
if(lo0 < hi) then quickSort(attIndex,lo0,hi);
// Если левый индекс не достиг правой стороны массива
// нужно сортировать правое разделение.
if(lo < hi0) then quickSort(attIndex,lo,hi0);
end;
end;

procedure TDMInstances.compactify();
begin
m_Instances.trimToSize();
end;

function TDMInstances.meanOrMode(attIndex:integer):double;
var
res, found : double;
counts:IArray;
j:integer;
begin
if attrMissingValues(attIndex) = numInstances then
begin
result := -Nan;
exit;
end;

if (attribute(attIndex).isNumeric()) then
begin
res := 0;
found := 0;
for j := 0 to numInstances()-1 do
begin
if (not (instance(j).isMissing(attIndex))) then
begin
found := found + instance(j).weight();
res := res + instance(j).weight()*instance(j).value(attIndex);
end;
end;
if (found <= 0) then
Result:= 0
else
Result:=res/found;
end
else
if (attribute(attIndex).isNominal()) then
begin
SetLength(counts,attribute(attIndex).numValues());
for j := 0 to numInstances()-1 do
begin
if (not (instance(j).isMissing(attIndex))) then
counts[trunc(instance(j).value(attIndex))] := trunc(instance(j).weight)
+ counts[trunc(instance(j).value(attIndex))];
end;
Result:=Utils.maxIndex(counts);
end
else
Result:= 0;
end;

function TDMInstances.variance(attIndex:integer):double;
var
sum, sumSquared, sumOfWeights, res : double;
i:integer;
begin
sum := 0;
sumSquared := 0;
sumOfWeights := 0;

if ( not (attribute(attIndex).isNumeric())) then
raise EUnsupportedAttributeTypeException.Create('Нельзя вычислить отклонение, так как атрибут не числовой!');

for i := 0 to numInstances()-1 do
begin
if ( not (instance(i).isMissing(attIndex))) then
begin
sum := sum + instance(i).weight()*instance(i).value(attIndex);
sumSquared := sumSquared + instance(i).weight()*instance(i).value(attIndex)*instance(i).value(attIndex);
sumOfWeights := sumOfWeights + instance(i).weight();
end;
end;

if (sumOfWeights <= 1) then
res := 0
else
res := ((sumSquared - (sum * sum / sumOfWeights))
/(sumOfWeights - 1));

// Нам нужно положительное отклонение
if (res < 0) then
Result:= 0
else
Result:= res;
end;

function TDMInstances.minNumAttrValue(attIndex : integer): double;
var
attVals : TDMInstanceValues;
sorted : IArray;
begin
result := 0;
if (attribute(attIndex).isNumeric()) then
begin
attVals := attributeToDoubleArray(attIndex);
sorted := Utils.sort(DArray(attVals));
result := attVals[sorted[0]];
end else
raise EIllegalType.Create('Не разрешено вычисление минимального значения для качественного аттрибута');
end;

function TDMInstances.maxNumAttrValue(attIndex : integer): double;
var
attVals : TDMInstanceValues;
sorted : IArray;
begin
if attrMissingValues(attIndex)=numInstances then
begin
result := -NAN;
exit;
end;
result := 0;
if (attribute(attIndex).isNumeric()) then
begin
attVals := attributeToDoubleArray(attIndex);
sorted := Utils.sort(DArray(attVals));
result := attVals[sorted[length(sorted)-1-attrMissingValues(attIndex)]];
end
else
raise EIllegalType.Create('Не разрешено вычисление максимального значения для качественного аттрибута');
end;

function TDMInstances.stdDeviation(attIndex : integer): double;
var
attVals : TDMInstanceValues;
sorted : IArray;
notNanArray : array of double;
i : integer;
begin
if attrMissingValues(attIndex) = numInstances then
begin
result := -Nan;
exit;
end;
if attrMissingValues(attIndex) = numInstances-1 then
begin
result := 0;
exit;
end;
result := 0;
if (attribute(attIndex).isNumeric()) then
begin
attVals := attributeToDoubleArray(attIndex);
if (attrMissingValues(attIndex) <> 0) then
begin
sorted :=Utils.sort(DArray(attVals));
SetLength(notNanArray,length(attVals)- attrMissingValues(attIndex));
for i:=0 to length(notNanArray)-1 do
notNanArray[i] := attVals[sorted[i]];
result := StdDev(notNanArray);
end
else
result := StdDev(attVals);
end
else
raise EIllegalType.Create('Не разрешено вычисление ско для качественного аттрибута');
end;

function TDMInstances.numDistinctValues(attIndex : integer) : integer;
var
attVals : TDMInstanceValues;
sorted : IArray;
prev : double;
counter : integer;
i,n : integer;
current : TDMInstance;
F:TextFile;
begin
result := 0;
if (attribute(attIndex).isNumeric()) then
begin
attVals := attributeToDoubleArray(attIndex);
sorted :=Utils.Sort(DArray(attVals));
prev := 0;
counter := 0;
n := length(sorted)-1-attrMissingValues(attIndex);
for i :=0 to n do
begin
current := instance(sorted[i]);
if (current.isMissing(attIndex)) then break;
if ((i = 0) or (current.value(attIndex) > prev)) then
begin
prev := current.value(attIndex);
inc(counter);
end;
end;
result:= counter;
end
else
begin
result:= attribute(attIndex).numValues();
for i:=0 to attribute(attIndex).numValues()-1 do
if instForAttrVAlue(attindex, i) = 0 then
dec(result);
end;
end;

function TDMInstances.numUniqueValues(attIndex : integer) :integer;
var
attVals : TDMInstanceValues;
sorted : IArray;
prev : TDMInstance;
next : TDMInstance;
counter : integer;
i,j, n,n1 : integer;
current : TDMInstance;
test, test1 : AnsiString;
begin
result := 0;

if attrMissingValues(attIndex)= numInstances() then exit;
if attrMissingValues(attIndex)= numInstances()-1 then
begin
result :=1;
exit;
end;

if (attribute(attIndex).isNumeric()) then
begin
attVals := attributeToDoubleArray(attIndex);
sorted :=Utils.sort(DArray(attVals));
counter := 0;
if (instance(sorted[0]).value(attIndex) <>
instance(sorted[1]).value(attIndex) )
then inc(counter);

if attrMissingValues(attIndex)= numInstances()-2 then
begin
result := counter+1;;
exit;
end;

n := length(sorted)-2-attrMissingValues(attIndex);
for i :=1 to n do
begin
prev := instance(sorted[i-1]);
current := instance(sorted[i]);
next := instance(sorted[i+1]);
if (current.isMissing(attIndex)) then break;
if (prev.isMissing(attIndex)) then break;
if (next.isMissing(attIndex)) then break;
if ( ( (current.value(attIndex) <> prev.value(attIndex)) and
(current.value(attIndex) <> next.value(attIndex)) ) ) then
begin
inc(counter);
end;
end;
if (not next.isMissing(attIndex) and
(current.value(attIndex) <> next.value(attIndex)))
then inc(counter);

result:= counter;
end
else
if (attribute(attIndex).isNominal()) then
begin
n := attribute(attIndex).numValues()-1;
for i:=0 to n do
begin
counter := 0;
n1 := numInstances() - 1;
for j:=0 to n1 do
begin
if (instance(j).isMissing(attIndex)) then continue;
if (instance(j).stringValue(attIndex) = attribute(attIndex).value(i)) then
inc(counter);
end;
if (counter = 1) then inc(result);
end;
end;
end;

function TDMInstances.instForAttrValue(attrIndex : integer; valueIndex: integer) : integer;
var
n1 : integer;
j : integer;
counter : integer;
begin
result :=0;
counter := 0;
n1 := numInstances() - 1;
for j:=0 to n1 do
begin
if (instance(j).isMissing(attrIndex)) then continue;
if (instance(j).stringValue(attrIndex) = attribute(attrIndex).value(valueIndex)) then
inc(counter);
end;
result := counter;
end;

function TDMInstances.instFor2AttrValue(attrIndex : integer; valueIndex: integer;
dattrIndex : integer; dvalueIndex: integer ) : integer;
var
n1 : integer;
j : integer;
counter : integer;
begin
result :=0;
counter := 0;
n1 := numInstances() - 1;
for j:=0 to n1 do
begin
if (instance(j).isMissing(attrIndex)) then break;
if (instance(j).isMissing(dattrIndex)) then break;
if (instance(j).stringValue(attrIndex) = attribute(attrIndex).value(valueIndex)) and
(instance(j).stringValue(dattrIndex) = attribute(dattrIndex).value(dvalueIndex)) then
inc(counter);
end;
result := counter;
end;

function TDMInstances.instForAttrValueInInterval(arr : DArray; attrIndex : integer; value :AnsiString; step : double; intNumber : integer):integer;
var
curLow : double;
curHi : double;
i : integer;
s : integer;
doubleArr : DArray;
begin
doubleArr := DArray(arr);
result := 0;
curLow := doubleArr[minIndex(doubleArr)];
curHi := curLow + step;

for s := 1 to intNumber do
begin
curLow := curHi;
curHi := curHi + step;
end;

for i := 0 to numInstances()-1 do
begin
if (doubleArr[i] >= curLow) and (doubleArr[i] < curHi)
and (instance(i).stringValue(attrIndex) = value) then
inc(result);
end;
end;

function TDMInstances.instForAttrValueInInterval(arr : DArray; attrIndexX : integer; attrIndexY :integer;
step : double; intNumber : integer):integer;
var
curLow : double;
curHi : double;
i : integer;
s : integer;
doubleArr : DArray;
meanY : double;
begin
doubleArr := DArray(arr);
result := 0;
meanY :=0;
curLow := doubleArr[minIndex(doubleArr)];
curHi := curLow + step;

for s := 1 to intNumber do
begin
curLow := curHi;
curHi := curHi + step;
end;

for i := 0 to numInstances()-1 do
begin
if (doubleArr[i] >= curLow) and (doubleArr[i] < curHi) then
if meanY = 0 then meanY := instance(i).value(attrIndexY)
else meanY := (meanY + instance(i).value(attrIndexY))/2;
end;
result := round(meanY);
end;

function TDMInstances.sortInstancesByNomAttr(attIndex : integer; attrValue : AnsiString) : IArray;
var
sortInst : IArray;
i : integer;
t : integer;
curPos : integer;
begin
curPos := 0;
SetLength(sortInst,numInstances());

if attribute(attIndex).attrType <> 2 then
raise EIllegalArgumentException.Create('Атрибут ' + attribute(attIndex).name + ' не является номинальным');

for t:=0 to numInstances()-1 do
begin
if (instance(t).isMissing(attIndex)) then break;
if instance(t).stringValue(attIndex) = attrValue then
begin
sortInst[curPos]:=t;
inc(curPos);
end;
end;

for t:=0 to numInstances()-1 do
if (instance(t).isMissing(attIndex)) then
begin
sortInst[curPos]:=t;
inc(curPos);
end;
result := sortInst;
end;

procedure TDMInstances.replaceWithMissing(attIndex : integer;
value : double);
var
i : integer;
begin
i := 0;
while ( i < numInstances()) do
begin
if (instance(i).isMissing(attIndex)) then
instance(i).m_AttValues[attIndex] := value;
inc(i);
end;
end;

function TDMInstances.attrCopyStructure() : TDMInstances;
var
atts : TDMFastVector;
i : integer;
att : TDMAttribute;
begin
atts := m_Attributes.CopyObject() as TDMFastVector;
result := TDMInstances.Create(relationName(), atts, 0);
result.m_ClassIndex := m_ClassIndex;
end;

function TDMInstances.getRandomNumberGenerator( seed : integer) : integer;
function hashCode(strToHash : AnsiString) : integer;
function intPower(base, exp : integer) : integer;
var
pow : integer;
i : integer;
begin
pow:=base;
for i := 0 to exp - 2 do
pow:= pow * base;
result := pow;
end;

var
h : integer;
off : integer;
value : AnsiString;
len : integer;
i : integer;
intVal : integer;
degree : integer;
begin
h := 0;
off :=0;
degree := 23;

value := Copy(strToHash,0,length(strToHash));
len:=length(strToHash);

i := 0;
while (i < len) do
begin
inc(off);
h := h + intPower(2,degree)*ord(value[off]);
if degree > 0 then dec(degree);
inc(i);
end;
result := h;
end;

var
curRes : double;
begin
RandSeed := seed;
result := hashCode(instance(randomRange(0,numInstances())).toString())
+ seed;
end;

procedure TDMInstances.setRelationName(newName : AnsiString);
begin
m_RelationName := newName;
end;

constructor TDMInstance.Create( instance : TDMInstance );
begin
m_AttValues := instance.m_AttValues;
m_Size := instance.m_Size;
m_Weight := instance.m_Weight;
m_Dataset := nil;
end;

constructor TDMInstance.Create( const weight : Double; attValues : TDMInstanceValues; const size : Integer );
begin
m_AttValues := attValues;
if ( size > 0 ) then m_Size := size;
m_Weight := weight;
m_DataSet := nil;
end;

constructor TDMInstance.Create( const numAttributes : Integer );
var
i, n1 : Integer;
begin
if ( numAttributes > 0 ) then
begin
SetLength( m_AttValues, numAttributes );
m_Size := numAttributes;
n1 := numAttributes - 1;
m_Dataset := nil;
m_Weight := 1;
for i := 0 to n1 do m_AttValues[ i ] := MISSING_VALUE;
end;
end;

constructor TDMInstance.Create( weight : double; attValues : TDMInstanceValues);
begin
m_AttValues := attValues;
m_Weight := weight;
m_Dataset := nil;
end;

constructor TDMInstance.Create( weight : double; attValues : TDMInstanceValues;
dataset : TDMInstances);
begin
m_AttValues := attValues;
m_Weight := weight;
m_Dataset := dataset;
end;

procedure TDMInstances.deleteAttributeAt(position : integer);
var
i : integer;
current : TDMAttribute;
num : integer;
j : integer;
begin
j := m_Attributes.size();
if (position < 0) or (position >= m_Attributes.size()) then
raise EIllegalArgumentException.Create('Неверно задан индекс');

if (position = m_ClassIndex) then
raise EIllegalArgumentException.Create('невозможно удалить целевой атрибут');

if (m_ClassIndex > position) then
dec(m_ClassIndex);

num := m_Attributes.size();
m_Attributes.removeAndClearElementAt(position);
num := m_Attributes.size();

i := position;
while ( i < m_Attributes.size()) do
begin
current := m_Attributes.elementAt(i) as TDMAttribute;
current.setIndex(current.index() - 1);
inc(i);
end;

i := 0;
while ( i < numInstances()) do
begin
instance(i).forceDeleteAttributeAt(position);
inc(i);
end
end;

function TDMInstance.attribute( const index : Integer ) : TDMAttribute;
begin
if ( m_Dataset <> nil ) then Result := m_Dataset.attribute( index )
else Result := nil;
end;

function TDMInstance.numAttributes : Integer;
begin
Result := m_Size;
end;

function TDMInstance.weight : double;
begin
Result := m_Weight;
end;

function TDMInstance.size : integer;
begin
Result := m_size;
end;

procedure TDMInstance.setWeight( const weight : Double );
begin
m_Weight := weight;
end;

function TDMInstance.classAttribute : TDMAttribute;
begin
if ( m_Dataset <> nil ) then Result := m_Dataset.classAttribute
else Result := nil;
end;

function TDMInstance.classIndex : Integer;
begin
if ( m_Dataset <> nil ) then Result := m_Dataset.classIndex
else Result := -1;
end;

function TDMInstance.classIsMissing : Boolean;
begin
if ( m_Dataset <> nil ) then Result := isMissing( m_Dataset.classIndex )
else Result := false;
end;

function TDMInstance.numClasses : Integer;
begin
Result := 0;
if ( m_Dataset <> nil ) then Result := m_Dataset.numClasses;
end;

function TDMInstance.isMissing( const index : Integer ) : Boolean;
begin
Result := IsNaN( m_AttValues[ index ] );
end;

function TDMInstance.isMissing( attribute : TDMAttribute ) : Boolean;
begin
Result := IsNaN( m_AttValues[ attribute.index ] );
end;

function missingValue : Double;
begin
Result := MISSING_VALUE;
end;

function isMissingValue( const value : Double ) : Boolean;
begin
Result := IsNaN( value );
end;

function TDMInstance.enumerateAttributes : TDMAttributeEnumeration;
begin
if ( m_Dataset <> nil ) then Result := m_Dataset.enumerateAttributes
else Result := nil;
end;

procedure TDMInstance.setValue( const index : Integer; const value : Double );
begin
m_AttValues[ index ] := value;
end;

procedure TDMInstance.setValue( attribute : TDMAttribute; const value : Double );
begin
m_AttValues[ attribute.index ] := value;
end;

procedure TDMInstance.setValue( const index : Integer; const value : AnsiString );
var
valIndex : Integer;
isString : Boolean;
attribute : TDMAttribute;
begin
if ( m_Dataset <> nil ) then
begin
attribute := m_Dataset.attribute( index );
isString := attribute.isString;
if ( attribute.isNominal ) or ( isString ) then
begin
valIndex := attribute.indexOfValue( value );
if ( valIndex >= 0 ) then
m_AttValues[ attribute.index ] := valIndex
else if ( isString ) then
begin
m_AttValues[ attribute.index ] := attribute.addStringValue( value );
end;
end;
end;
end;

function TDMInstance.value( const index : Integer ) : Double;
begin
Result := m_AttValues[ index ];
end;

function TDMInstance.value( attribute : TDMAttribute ) : Double;
begin
Result := m_AttValues[ attribute.index ];
end;

function TDMInstance.stringValue( const index : Integer ) : AnsiString;
begin
Result := '';
if ( m_Dataset <> nil ) then Result := stringValue( m_Dataset.attribute( index ) );
end;

function TDMInstance.stringValue( attribute : TDMAttribute ) : AnsiString;
begin
Result := '';
if ( attribute.attributeType = ATTRIBUTE_TYPE_NOMINAL ) or
( attribute.attributeType = ATTRIBUTE_TYPE_STRING ) then
begin
Result := attribute.value( Round( m_AttValues[ attribute.index ] ) );
end;
end;

procedure TDMInstance.setValue( attribute : TDMAttribute; const value : AnsiString );
var
valIndex : Integer;
isString : Boolean;
begin
isString := attribute.isString;
if ( attribute.isNominal ) or ( isString ) then
begin
valIndex := attribute.indexOfValue( value );
if ( valIndex >= 0 ) then
m_AttValues[ attribute.index ] := valIndex
else
if ( isString ) then
begin
m_AttValues[ attribute.index ] := attribute.addStringValue( value );
end;
end;
end;

procedure TDMInstance.setMissing( const index : Integer );
begin
m_AttValues[ index ] := MISSING_VALUE;
end;

procedure TDMInstance.setMissing( attribute : TDMAttribute );
begin
m_AttValues[ attribute.index ] := MISSING_VALUE;
end;

function TDMInstance.classValue : Double;
begin
if ( m_Dataset <> nil ) then Result := m_AttValues[ m_Dataset.classIndex ]
else Result := MISSING_VALUE;
end;

procedure TDMInstance.setClassMissing;
begin
if ( m_Dataset <> nil ) then
m_AttValues[ m_Dataset.classIndex ] := MISSING_VALUE;
end;

procedure TDMInstance.setClassValue( const value : Double );
begin
if ( m_Dataset <> nil ) then
m_AttValues[ m_Dataset.classIndex ] := value;
end;

procedure TDMInstance.setDataset( dataset : TDMInstances );
begin
m_Dataset := dataset;
end;

function TDMInstance.dataset : TDMInstances;
begin
Result := m_Dataset;
end;

function TDMInstance.equalHeaders( otherInstance : TDMInstance ) : Boolean;
begin
if ( m_Dataset <> nil ) then Result := m_Dataset.equalHeaders( otherInstance.m_Dataset )
else Result := false;
end;

function TDMInstance.hasMissingValue : Boolean;
var
i, n1, ci : Integer;
begin
if ( m_Dataset <> nil ) then
begin
Result := false;
ci := m_Dataset.classIndex;
n1 := m_Size - 1;
for i := 0 to n1 do
begin
if ( i <> ci ) and ( isMissing( i ) ) then
begin
Result := true;
Break;
end;
end;
end
else Result := false;
end;

function TDMInstance.index( const position : Integer ) : Integer;
begin
Result := position;
end;

function TDMInstance.mergeInstance( instance : TDMInstance ) : TDMInstance;
var
newInstance : TDMInstance;
i, n1, sn : Integer;
begin
Result := nil;
if ( instance <> nil ) then
begin
sn := instance.m_Size + m_Size;
newInstance := TDMInstance.Create;
newInstance.m_AttValues := Copy( m_AttValues, 0, m_Size );
SetLength( newInstance.m_AttValues, sn );
n1 := instance.m_Size - 1;
for i := 0 to n1 do
newInstance.m_AttValues[ i + m_Size ] := instance.m_AttValues[ i ];
newInstance.m_Size := sn;
newInstance.m_Dataset := nil;
newInstance.m_Weight := 1.0;
Result := newInstance;
end;
end;

procedure TDMInstance.deleteAttributeAt( const position : Integer );
var
newValues : TDMInstanceValues;
i, n1 : Integer;
begin
if ( m_Dataset <> nil ) then
if ( position >= 0 ) and ( position < m_Size ) then
begin
newValues := Copy( m_AttValues, 0, position );
n1 := m_Size - 1;
SetLength( newValues, n1 );
for i := position + 1 to n1 do
begin
newValues[ i - 1 ] := m_AttValues[ i ];
end;
SetLength( m_AttValues, 0 );
m_AttValues := newValues;
m_Size := m_Size - 1;
end;
end;

procedure TDMInstance.insertAttributeAt( const position : Integer );
var
newValues : TDMInstanceValues;
i : Integer;
begin
if ( m_Dataset <> nil ) then
if ( position >= 0 ) and ( position < m_Size ) then
begin
newValues := Copy( m_AttValues, 0, position );
SetLength( newValues, m_Size + 1 );
newValues[ position ] := MISSING_VALUE;
for i := position + 1 to m_Size do
begin
newValues[ i ] := m_AttValues[ i - 1 ];
end;
SetLength( m_AttValues, 0 );
m_AttValues := newValues;
m_Size := m_Size + 1;
end;
end;

destructor TDMInstance.Destroy;
begin
if ( m_AttValues <> nil ) then SetLength( m_AttValues, 0 );
end;

procedure TDMInstance.replaceMissingValues( replacement : TDMInstanceValues );
var
i, n1, rn : Integer;
begin
rn := Length( replacement );
if ( rn = m_Size ) then
begin
n1 := m_Size - 1;
for i := 0 to n1 do
begin
if isMissing( i ) then m_AttValues[ i ] := replacement[ i ];
end;
end;
end;

procedure TDMInstance.forceDeleteAttributeAt(position : integer);
var
newValues : TDMInstanceValues;
j : integer;
begin
SetLength(newValues, length(m_AttValues) - 1);

newValues := TDMInstanceValues(copy(DArray(m_AttValues), 0, position));
if (position < length(m_AttValues) - 1) then
begin
SetLength(newValues, length(m_AttValues) - 1);
j:= position;
while (j < length(m_AttValues)-1) do
begin
newValues[j] := m_AttValues[j+1];
inc(j);
end;
end;
m_AttValues :=nil;
m_AttValues := newValues;
m_size := m_size-1;
end;

function TDMInstance.cloneObject : TObject;
begin
Result := nil;
end;

function TDMInstance.copyObject : TObject;
var
newInstance : TDMInstance;
begin
newInstance := TDMInstance.Create( Self );
newInstance.m_Dataset := m_Dataset;
Result := newInstance;
end;

procedure TDMInstance.serializeObject( ostream : TStream );
begin
end;

procedure TDMInstance.deserializeObject( istream : TStream );
begin
end;

function TDMInstance.toString : AnsiString;
var
i, n1, atype : Integer;
begin
n1 := m_Size - 1;
Result := '';
for i := 0 to n1 do
begin
if ( i <> 0 ) then Result := Result + ', ';
if ( isMissing( i ) ) then Result := Result + '?'
else
if ( m_Dataset = nil ) then Result := Result + (FloatToStr( m_AttValues[ i ] ))
else
begin
atype := attribute( i ).attributeType;
if ( atype = ATTRIBUTE_TYPE_NOMINAL ) or
( atype = ATTRIBUTE_TYPE_STRING ) then
Result := Result + Utils.simpleQuote( stringValue( i ) )
else
if ( atype = ATTRIBUTE_TYPE_NUMERIC ) then
Result := Result + FloatToStrF( m_AttValues[ i ],ffFixed,5,2);
end;
end;
end;

function TDMInstance.toString( ignoreAttr:BoolArray) : AnsiString;
var
i, n1, atype : Integer;
begin
Result := '';
if length(ignoreAttr) < m_Size then
result := '';
n1 := length(ignoreAttr)-1;
for i := 0 to n1 do
begin
if (ignoreAttr[i] = true) then continue;
if (( i <> 0 )and (result<>'')) then Result := Result + ', ';
if ( isMissing( i ) ) then Result := Result + '?'
else
if ( m_Dataset = nil ) then Result := Result + (FloatToStr( m_AttValues[ i ] ))
else
begin
atype := attribute( i ).attributeType;
if ( atype = ATTRIBUTE_TYPE_NOMINAL ) or
( atype = ATTRIBUTE_TYPE_STRING ) then
Result := Result + Utils.simpleQuote( stringValue( i ) )
else
if ( atype = ATTRIBUTE_TYPE_NUMERIC ) then
Result := Result + FloatToStrF( m_AttValues[ i ],ffFixed,5,2);
end;
end;
end;

constructor TDMAttributeEnumeration.Create( dataset : TDMInstances );
begin
m_Counter := 0;
m_FastVector := dataset.m_Attributes;
m_SpecialElement := -1;
end;

function TDMAttributeEnumeration.hasMoreElements : Boolean;
begin
Result := false;
if ( m_Counter < m_FastVector.size ) then Result := true;
end;

function TDMAttributeEnumeration.nextElement : TDMAttribute;
begin
Result := m_FastVector.elementAt( m_Counter ) as TDMAttribute;
m_Counter := m_Counter + 1;
if ( m_Counter = m_SpecialElement ) then m_Counter := m_Counter + 1;
end;

constructor TDMInstanceEnumeration.Create( dataset : TDMInstances );
begin
m_Counter := 0;
m_FastVector := dataset.m_Instances;
m_SpecialElement := -1;
end;

function TDMInstanceEnumeration.hasMoreElements : Boolean;
begin
Result := false;
if ( m_Counter < m_FastVector.size ) then Result := true;
end;

function TDMInstanceEnumeration.nextElement : TDMInstance;
begin
Result := m_FastVector.elementAt( m_Counter ) as TDMInstance;
m_Counter := m_Counter + 1;
if ( m_Counter = m_SpecialElement ) then m_Counter := m_Counter + 1;
end;



end.
Соседние файлы в папке DMCore