unit CrawlProcs;

interface

uses
  Windows, Messages, SysUtils, mStrin32;

//  GUID: {51A35D60-2BFD-11D7-8FE1-00C00C702F1A  

const
  wm_Start = wm_user + 1;

  //Tasks
  wt_None       = 0;
  wt_Resolving  = 1;
  wt_Fetching   = 2;  

  //RuleKinds
  rk_MatchText          = 0;
  rk_MatchTextCS        = 1;

  //Rule results
  rk_Skip               = 0;
  rk_Get                = 1;

type
  TRuleOption =
    (ro_if, ro_CS, ro_URL, ro_Get, ro_Skip);

type
  TURL = record
    Link: Shortstring;
    Level: Integer;
    User: Shortstring;
    Pass: Shortstring;
  end;
  TRule = packed record
    Link: Shortstring;
    RuleKind: Integer;
    RuleKind2: Integer;
    Condition: Shortstring;
    Result1: Integer;
    Result2: Shortstring;
    Other: Array[1..220] of Byte;
  end;
  //This is for the Image Viewer
  PWebRe=^TWebRe;
  TWebRe = packed record
    FileName: string[200];
    WebSite: string[200];
  end;

function RulesAllow(Link: string; URL: string): Boolean;
function RulesEnforce(Link: string; URL: string): Boolean;
function StrToRule(T: string; var R: TRule): Boolean;
function StrToURL(S: string): TURL;
function URLToStr(T: TURL): string;

var
  //Messages and Pointers related to Image viewer
  wm_LoadPic, Mh: THandle;
  Mp: PWebRE;
  //To get WebCrawler to visit a URL, fill MP with
  //the URL then send wm_Control Message with return
  //handle in lParam, cm_Browse in wParam
  wm_Control: THandle;
  Uh: THandle;
  URLText: PChar;

const
  cm_Browse           = $1;
  cm_Clear            = $2;
  cm_Respond          = $3;
  cm_FinishedURL      = $4;
  cm_FinishedDL       = $5;

  URLTextLen          = 20000;

implementation

function RulesAllowEx(Init: Boolean; Link: string; URL: string): Boolean;
var
  X: Boolean;
  F: File of TRule;
  R: TRule;
begin
  X := Init;
  Link := LowerCase(Link);
  AssignFile(F, My_FileDirectory(ParamStr(0)) + 'rules.dat');
  {$I-}
  Reset(F);
  if IOResult<>0 then
    Rewrite(F);
  {$I+}
  while not eof(F) do
    begin
      Read(F, R);
      if AnsiPos(LowerCase(R.Link), Link) = 0 then
        Continue;
      if R.RuleKind = rk_MatchText then
        if AnsiPos(LowerCase(R.Condition), LowerCase(URL)) > 0 then
          X := R.Result1 = rk_Get;
      if R.RuleKind = rk_MatchTextCS then
        if AnsiPos(R.Condition, URL) > 0 then
          X := R.Result1 = rk_Get;
    end;
  CloseFile(F);
  Result := X;
end;

function RulesAllow(Link: string; URL: string): Boolean;
begin
  Result := RulesAllowEx(True, Link, URL);
end;

function RulesEnforce(Link: string; URL: string): Boolean;
begin
  Result := RulesAllowEx(False, Link, URL);
end;

function StrToRule(T: string; var R: TRule): Boolean;
var
  X: Boolean;
  I, J, K, L: Integer;
  D: array[1..100] of string[100];
  A: string;
  G: TRule;
  P: Set of TRuleOption;
begin
  X := True;
  G.Link := R.Link;
  G.RuleKind := 0;
  G.RuleKind2 := 0;
  G.Condition := '';
  G.Result1 := 0;
  G.Result2 := '';
  //tolkenize
  for I := 1 to 100 do D[I] := '';
  I := 1;
  J := 1;
  While J < Length(T) do
    begin
      if (T[J] = ' ') OR (T[J] = '=') OR  (T[J] = '"') then
        Inc(I)
      else
        D[I] := D[I] + T[J];
      Inc(J);
      if I > 100 then
        I := 100;
    end;
  P := [];

  //Check First word
  if LowerCase(D[1]) = 'if' then
    P := [ro_if]
  else
    X := False;

  //Look for the operator. Everything in between is the condition
  K := 1;
  for J := 2 to I do
    begin
      A := LowerCase(D[J]);
      if (A = 'in') OR (A = '=') then
        begin
          K := J;
          if LowerCase(D[K + 1]) = 'url' then
            P := P + [ro_URL];
          if AnsiPos('(case sensitive)', LowerCase(D[K - 1])) > 0 then
            P := P + [ro_CS];
          Break;
        end;
    end;
  if K = 1 then
    X := False;

  //Build a condition string
  A := '';
  for J := 2 to K - 1 do
    A := A + D[J];
  if A[1] = '"' then
    begin
      A := My_Substring(A, 2, -1);
      A := My_stringToEx(A, '"');
    end;
  G.Condition := A;

  //Locate the 'then'
  L := 1;
  for J := K + 1 to I do
    begin
      A := LowerCase(D[J]);
      if (A = 'then') then
        begin
          L := J;
          if LowerCase(D[L + 1]) = 'get' then
            P := P + [ro_GET];
          if LowerCase(D[L + 1]) = 'skip' then
            P := P + [ro_Skip];
          Break;
        end;
    end;
  if L = 1 then
    X := False;

  if ro_if in P then G.RuleKind := rk_MatchText;
  if [ro_if, ro_CS] * P = [ro_if, ro_CS] then G.RuleKind := rk_MatchText;
  if ro_Get in P then G.Result1 := rk_Get;
  if ro_Skip in P then G.Result1 := rk_Skip;

  if X then
    R := G;
  Result := X;
end;

function StrToURL(S: string): TURL;
var
  I: Integer;
  L: TURL;
begin
  L.Link := '';
  L.Level := 0;
  L.User := '';
  L.Pass := '';
  I := AnsiPos(#1, S);
  if I = 0 then
    begin
      L.Link := S;
      Result := L;
      Exit;
    end;
  L.Link := My_Substring(S, 1, I - 1);
  S := My_Substring(S, I + 1, -1);
  I := AnsiPos(#1, S);
  try
    L.Level := StrToInt(My_Substring(S, 1, I - 1));
  except
    On EConvertError do
      begin end;
  end;
  S := My_Substring(S, I + 1, -1);
  I := AnsiPos(#1, S);
  if I > 1 then
    L.User := My_Substring(S, 1, I - 1);
  S := My_Substring(S, I + 1, -1);
  L.Pass := S;
  Result := L;
end;

function URLToStr(T: TURL): string;
begin
  Result :=
    T.Link + #1 + IntToStr(T.Level) + #1 + T.User + #1 + T.Pass;
end;

initialization
  //Image Viewer Constants
  wm_LoadPic := RegisterWindowMessage('wm_LoadPic');
  wm_Control := RegisterWindowMessage('wm_Control');
  Mh := CreateFileMapping($FFFFFFFF, nil, Page_ReadWrite,
    0, $FFF, 'WebRE');
  Mp := MapViewofFile(Mh, File_Map_All_Access, 0, 0, 0);
  Uh := CreateFileMapping($FFFFFFFF, nil, Page_ReadWrite,
    0, URLTextLen, 'URLText');
  URLText := MapViewofFile(Uh, File_Map_All_Access, 0, 0, 0);
finalization
  UnMapViewOfFile(Mp);
  CloseHandle(Mh);
  UnMapViewOfFile(URLText);
  CloseHandle(Uh);
end.
