Скачиваний:
28
Добавлен:
01.05.2014
Размер:
8.77 Кб
Скачать
unit StreamTokenizer;

interface
uses
Classes,
DmmConstants,
Exceptions,
SysUtils;

const
CT_WHITESPACE = 1;
CT_DIGIT = 2;
CT_ALPHA = 4;
CT_QUOTE = 8;
CT_COMMENT = 16;

//конец потока
TT_EOF = -1;
//конец строки
TT_EOL = -2;
//прочитан числовой токен
TT_NUMBER = -3;
//прочитан токен типа word
TT_WORD = -4;
//инициализация
TT_NOTHING = -5;

var
curPosition : integer;
//номер строки последнего прочитанного токена
LINENO : integer;

type
TDMStreamTokenizer = class
private
reader : PChar;
size : integer;

ctype : array of integer;

public
//определяет тип прочитанного токина
ttype : integer;

//содержит строку если токен типа word
sval : AnsiString;

//содержит число если токен числовой
nval : double;

private
//не инициализирует потоки
constructor Create(); overload;
public function read() : integer;

public
constructor Create (r : TStream); overload;
procedure wordChars( low : integer; hi : integer);
procedure whitespaceChars( low : integer; hi : integer);
procedure commentChar(ch : integer);
procedure quoteChar(ch : integer);
procedure ordinaryChar(ch : integer);
procedure parseNumbers();
procedure resetSyntax();
destructor Destroy();
function nextToken() : integer;
function toString() : AnsiString;
function getReader():PChar;
procedure pushFront();
end;

implementation
constructor TDMStreamTokenizer.Create();
var
i : integer;
begin

curPosition := 0;
LINENO := 1;

//инициализация
ttype := TT_NOTHING;
SetLength(ctype,256);
for i := 0 to 255 do ctype[i] := 0;
wordChars(ord('a'), ord('z'));
wordChars(ord('A'), ord('Z'));
wordChars(128 + 32, 255);
whitespaceChars(0, ord(' '));
commentChar(ord('/'));
quoteChar(ord('"'));
quoteChar(ord(''''));
parseNumbers();

end;

procedure TDMStreamTokenizer.pushFront();
begin
inc(curPosition);
end;

function TDMStreamTokenizer.getReader():PChar;
begin
result := reader;
end;
function TDMStreamTokenizer.read() : integer;
begin
if (reader <> nil) then
begin
if (curPosition >= size) then
begin
result := -1;
Exit;
end;
result := ord(reader[curPosition]);
inc(curPosition);
end
else
raise EIllegalStreamException.Create('Поток не задан');
end;

constructor TDMStreamTokenizer.Create (r : TStream);
begin
self.Create();
if (r = nil) then raise EInvalidPointer.Create('Нулевой указатель');
size := r.Size;
GetMem(reader, size);
try
r.Read(reader[0], size);
except
on Exception do FreeMem(reader);
end;
end;

destructor TDMStreamTokenizer.Destroy ();
begin
FreeMem(reader);
end;

procedure TDMStreamTokenizer.wordChars(low : integer; hi : integer);
begin
if (low < 0) then low := 0;
if (hi >= length(ctype)) then
hi := length(ctype) - 1;
while (low <= hi) do
begin
ctype[low] := (CT_ALPHA or ctype[low]);
inc(low);
end;
end;

procedure TDMStreamTokenizer.whitespaceChars(low : integer; hi : integer);
begin
if (low < 0) then
low := 0;
if (hi >= length(ctype)) then
hi := length(ctype) - 1;
while (low <= hi) do
begin
ctype[low] := CT_WHITESPACE;
inc(low);
end;
end;

procedure TDMStreamTokenizer.commentChar(ch : integer);
begin
if ( (ch >= 0) and (ch < length(ctype) ) ) then
ctype[ch] := CT_COMMENT;
end;

procedure TDMStreamTokenizer.quoteChar(ch : integer);
begin
if ((ch >= 0) and (ch < length(ctype))) then
ctype[ch] := CT_QUOTE;
end;

procedure TDMStreamTokenizer.ordinaryChar(ch : integer);
begin
if ((ch >= 0) and (ch < length(ctype))) then
ctype[ch] := 0;
end;

procedure TDMStreamTokenizer.parseNumbers();
var
i : integer;
begin
i := ord('0');
while( i <= ord('9')) do
begin
ctype[i] := CT_DIGIT or ctype[i];
inc(i);
end;
ctype[ord('.')] := CT_DIGIT or ctype[ord('.')];
ctype[ord('-')] := CT_DIGIT or ctype[ord('-')];
end;

procedure TDMStreamTokenizer.resetSyntax();
var
i : integer;
begin
i := length(ctype)-1;
while (i >= 0) do
begin
ctype[i] := 0;
dec(i);
end;
end;

function TDMStreamTokenizer.nextToken() : integer;
var
st : AnsiString;
i : integer;
c : integer;
begin
ttype := TT_NOTHING;
c := read();
sval := '?';
st := '';

if (c > 0) then
while ((ctype[c] = CT_COMMENT) or (ctype[c] = CT_WHITESPACE))
and (c<>10) do
begin
//комментарий
if (ctype[c] = CT_COMMENT) then
begin
while ((c <> 10) and (c >= 0)) do c:=read();
inc(LINENO);
c := read();
if (c < 0) then break;
end;

//пробелы
while ((ctype[c] = CT_WHITESPACE) and (c<>10)) do
begin
c:=read();
if (c < 0) then break;
end;
end;

//конец файла
if (c < 0) then
begin
ttype := TT_EOF;
result := ttype;
Exit;
end;

//конец строки
if (c = 10) then
begin
ttype := TT_EOL;
result := ttype;
inc(LINENO);
Exit;
end;

case ctype[c] of
//специальный символ
0 :
begin
ttype := c;
pushFront();
end;

//число
CT_DIGIT :
begin
ttype := TT_NUMBER;
nval := 0;

i := 0;
st := '';

if (c = ord('-')) then
begin
st[i] := chr(c);
inc(i);
c := read();
end;

//целая часть
while (ctype[c] <> CT_WHITESPACE) or (c <> ord('.')) do
begin
if (ctype[c] <> CT_DIGIT) or (c = ord('-')) then
raise EInputFileException.Create('Ошибка в записи числа, строка ' + IntToStr(LINENO));
st[i] := chr(c);
inc(i);
c := read();
end;
if (st <> '') then nval := StrToInt(st);

//дробная часть
if (c = ord('.')) then
begin
st := '';
i := 0;
c := read();
while (c <> CT_WHITESPACE) do
begin
if (ctype[c] <> CT_DIGIT) or (c = ord('-')) or (c = ord('.')) then
raise EInputFileException.Create('Ошибка в записи числа, строка ' + IntToStr(LINENO));
st[i] := chr(c);
inc(i);
c := read();
end;
end;
if (st <> '') then nval := nval + StrToInt(st)/(10*length(st));
end;

//слово
CT_ALPHA :
begin
ttype := TT_WORD;
i:=0;
sval:='';
while (ctype[c] = CT_ALPHA) do
begin
sval:=concat(sval,chr(c));
inc(i);
c := read();
if (c < 0) then break;
end;
end;

//кавычки
CT_QUOTE :
begin
ttype := c;
i:=0;
sval:='';
c:=read();
while (ctype[c] <> CT_QUOTE) do
begin
if (ctype[c] <> CT_ALPHA) then
raise EInputFileException.Create('Ошибка в записи слова, строка ' + IntToStr(LINENO));
sval:=concat(sval,chr(c));
inc(i);
c := read();
end;
c:=read();
end;
end;
result := ttype;
if (c <> -1) then
dec(curPosition);
end;

function TDMStreamTokenizer.toString() : AnsiString;
var
ret : AnsiString;
number : AnsiString;
comment : AnsiString;
begin
case ttype of
TT_EOF:
ret := 'EOF';
TT_EOL:
ret := 'EOL';
TT_WORD:
ret := sval;
TT_NUMBER:
begin
Str(nval,number);
ret := 'n=' + number;
end;
TT_NOTHING:
ret := 'NOTHING';
else
begin
if ((ttype < 256) and (ctype[ttype] = CT_QUOTE) ) then
ret := sval;

ret := Concat ('''',ret,'''')
end;
end;
Result := 'Token[' + ret + '], line ' + IntToStr(LINENO);
end;

end.