pcregexp.pas 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. {
  2. pcRegExp - Perl compatible regular expressions for Virtual Pascal
  3. (c) 2001 Peter S. Voronov aka Chem O'Dun <petervrn@yahoo.com>
  4. Based on PCRE library interface unit for Virtual Pascal.
  5. (c) 2001 Alexander Tokarev <dwalin@dwalin.ru>
  6. The current PCRE version is: 3.7
  7. This software may be distributed under the terms of the modified BSD license
  8. Copyright (c) 2001, Alexander Tokarev
  9. All rights reserved.
  10. Redistribution and use in source and binary forms, with or without
  11. modification, are permitted provided that the following conditions are met:
  12. * Redistributions of source code must retain the above copyright notice,
  13. this list of conditions and the following disclaimer.
  14. * Redistributions in binary form must reproduce the above copyright notice,
  15. this list of conditions and the following disclaimer in the documentation
  16. and/or other materials provided with the distribution.
  17. * Neither the name of the <ORGANIZATION> nor the names of its contributors
  18. may be used to endorse or promote products derived from this software without
  19. specific prior written permission.
  20. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  21. ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  22. WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  24. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  26. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  27. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  28. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. The PCRE library is written by: Philip Hazel <ph10@cam.ac.uk>
  31. Copyright (c) 1997-2004 University of Cambridge
  32. AngelsHolocaust 4-11-04 updated to use version v5.0
  33. (INFO: this is regex-directed, NFA)
  34. AH: 9-11-04 - pcre_free: removed var, pcre already gives the ptr, now
  35. everything works as it should (no more crashes)
  36. -> removed CheckRegExp because pcre handles errors perfectly
  37. 10-11-04 - added pcError (errorhandling), pcInit
  38. 13-11-04 - removed the ErrorPos = 0 check -> always print erroroffset
  39. 17-10-05 - support for \1-\9 backreferences in TpcRegExp.GetReplStr
  40. 17-02-06 - added RunTimeOptions: caller can set options while searching
  41. 19-02-06 - added SearchOfs(): let PCRE use the complete string and offset
  42. into the string itself
  43. 20-12-06 - support for version 7.0
  44. 27.08.08 - support for v7.7
  45. }
  46. {$H+} {$DEFINE PCRE_3_7} {$DEFINE PCRE_5_0} {$DEFINE PCRE_7_0} {$DEFINE PCRE_7_7}
  47. Unit pcregexp;
  48. Interface
  49. uses objects;
  50. Type
  51. PpcRegExp = ^TpcRegExp;
  52. // TpcRegExp = object
  53. TpcRegExp = object(TObject)
  54. MatchesCount: integer;
  55. RegExpC, RegExpExt : Pointer;
  56. Matches:Pointer;
  57. RegExp: shortstring;
  58. SourceLen: integer;
  59. PartialMatch : boolean;
  60. Error : boolean;
  61. ErrorMsg : Pchar;
  62. ErrorPos : integer;
  63. RunTimeOptions: Integer; // options which can be set by the caller
  64. constructor Init(const ARegExp : shortstring; AOptions : integer; ALocale : Pointer);
  65. function Search(AStr: Pchar; ALen : longint) : boolean; virtual;
  66. function SearchNext( AStr: Pchar; ALen : longint) : boolean; virtual;
  67. function SearchOfs ( AStr: Pchar; ALen, AOfs : longint) : boolean; virtual;
  68. function MatchSub(ANom: integer; var Pos, Len : longint) : boolean; virtual;
  69. function MatchFull(var Pos, Len : longint) : boolean; virtual;
  70. function GetSubStr(ANom: integer; AStr: Pchar) : string; virtual;
  71. function GetFullStr(AStr: Pchar) : string; virtual;
  72. function GetReplStr(AStr: Pchar; const ARepl: string) : string; virtual;
  73. function GetPreSubStr(AStr: Pchar) : string; virtual;
  74. function GetPostSubStr(AStr: Pchar) : string; virtual;
  75. function ErrorStr : string; virtual;
  76. destructor Done; virtual;
  77. end;
  78. function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
  79. function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
  80. function pcFastGrepMatch(WildCard, aStr: string): Boolean;
  81. function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
  82. {$IFDEF PCRE_5_0}
  83. function pcGetVersion : pchar;
  84. {$ENDIF}
  85. function pcError (var pRegExp : Pointer) : Boolean;
  86. function pcInit (const Pattern: Shortstring; CaseSens: Boolean) : Pointer;
  87. Const { Options }
  88. PCRE_CASELESS = $0001;
  89. PCRE_MULTILINE = $0002;
  90. PCRE_DOTALL = $0004;
  91. PCRE_EXTENDED = $0008;
  92. PCRE_ANCHORED = $0010;
  93. PCRE_DOLLAR_ENDONLY = $0020;
  94. PCRE_EXTRA = $0040;
  95. PCRE_NOTBOL = $0080;
  96. PCRE_NOTEOL = $0100;
  97. PCRE_UNGREEDY = $0200;
  98. PCRE_NOTEMPTY = $0400;
  99. {$IFDEF PCRE_5_0}
  100. PCRE_UTF8 = $0800;
  101. PCRE_NO_AUTO_CAPTURE = $1000;
  102. PCRE_NO_UTF8_CHECK = $2000;
  103. PCRE_AUTO_CALLOUT = $4000;
  104. PCRE_PARTIAL = $8000;
  105. {$ENDIF}
  106. {$IFDEF PCRE_7_0}
  107. PCRE_DFA_SHORTEST = $00010000;
  108. PCRE_DFA_RESTART = $00020000;
  109. PCRE_FIRSTLINE = $00040000;
  110. PCRE_DUPNAMES = $00080000;
  111. PCRE_NEWLINE_CR = $00100000;
  112. PCRE_NEWLINE_LF = $00200000;
  113. PCRE_NEWLINE_CRLF = $00300000;
  114. PCRE_NEWLINE_ANY = $00400000;
  115. PCRE_NEWLINE_ANYCRLF = $00500000;
  116. PCRE_NEWLINE_BITS = PCRE_NEWLINE_CR or PCRE_NEWLINE_LF or PCRE_NEWLINE_ANY;
  117. {$ENDIF}
  118. {$IFDEF PCRE_7_7}
  119. PCRE_BSR_ANYCRLF = $00800000;
  120. PCRE_BSR_UNICODE = $01000000;
  121. PCRE_JAVASCRIPT_COMPAT= $02000000;
  122. {$ENDIF}
  123. PCRE_COMPILE_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_AUTO_CALLOUT + PCRE_CASELESS +
  124. PCRE_DOLLAR_ENDONLY + PCRE_DOTALL + PCRE_EXTENDED +
  125. PCRE_EXTRA + PCRE_MULTILINE + PCRE_NO_AUTO_CAPTURE +
  126. PCRE_UNGREEDY + PCRE_UTF8 + PCRE_NO_UTF8_CHECK
  127. {$IFDEF PCRE_7_0}
  128. + PCRE_DUPNAMES + PCRE_FIRSTLINE + PCRE_NEWLINE_BITS
  129. {$ENDIF}
  130. {$IFDEF PCRE_7_7}
  131. + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE + PCRE_JAVASCRIPT_COMPAT
  132. {$ENDIF}
  133. ;
  134. PCRE_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
  135. PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL
  136. {$IFDEF PCRE_7_0}
  137. + PCRE_NEWLINE_BITS
  138. {$ENDIF}
  139. {$IFDEF PCRE_7_7}
  140. + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
  141. {$ENDIF}
  142. ;
  143. {$IFDEF PCRE_7_0}
  144. PCRE_DFA_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
  145. PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL +
  146. PCRE_DFA_SHORTEST + PCRE_DFA_RESTART +
  147. PCRE_NEWLINE_BITS
  148. {$IFDEF PCRE_7_7}
  149. + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
  150. {$ENDIF}
  151. ;
  152. {$ENDIF}
  153. { Exec-time and get/set-time error codes }
  154. PCRE_ERROR_NOMATCH = -1;
  155. PCRE_ERROR_NULL = -2;
  156. PCRE_ERROR_BADOPTION = -3;
  157. PCRE_ERROR_BADMAGIC = -4;
  158. PCRE_ERROR_UNKNOWN_MODE = -5;
  159. PCRE_ERROR_NOMEMORY = -6;
  160. PCRE_ERROR_NOSUBSTRING = -7;
  161. {$IFDEF PCRE_5_0}
  162. PCRE_ERROR_MATCHLIMIT = -8;
  163. PCRE_ERROR_CALLOUT = -9; { Never used by PCRE itself }
  164. PCRE_ERROR_BADUTF8 = -10;
  165. PCRE_ERROR_BADUTF8_OFFSET = -11;
  166. PCRE_ERROR_PARTIAL = -12;
  167. PCRE_ERROR_BADPARTIAL = -13;
  168. PCRE_ERROR_INTERNAL = -14;
  169. PCRE_ERROR_BADCOUNT = -15;
  170. {$ENDIF}
  171. {$IFDEF PCRE_7_0}
  172. PCRE_ERROR_DFA_UITEM = -16;
  173. PCRE_ERROR_DFA_UCOND = -17;
  174. PCRE_ERROR_DFA_UMLIMIT = -18;
  175. PCRE_ERROR_DFA_WSSIZE = -19;
  176. PCRE_ERROR_DFA_RECURSE = -20;
  177. PCRE_ERROR_RECURSIONLIMIT = -21;
  178. PCRE_ERROR_NULLWSLIMIT = -22;
  179. PCRE_ERROR_BADNEWLINE = -23;
  180. {$ENDIF}
  181. { Request types for pcre_fullinfo() }
  182. PCRE_INFO_OPTIONS = 0;
  183. PCRE_INFO_SIZE = 1;
  184. PCRE_INFO_CAPTURECOUNT = 2;
  185. PCRE_INFO_BACKREFMAX = 3;
  186. PCRE_INFO_FIRSTBYTE = 4;
  187. PCRE_INFO_FIRSTCHAR = 4; { For backwards compatibility }
  188. PCRE_INFO_FIRSTTABLE = 5;
  189. {$IFDEF PCRE_5_0}
  190. PCRE_INFO_LASTLITERAL = 6;
  191. PCRE_INFO_NAMEENTRYSIZE = 7;
  192. PCRE_INFO_NAMECOUNT = 8;
  193. PCRE_INFO_NAMETABLE = 9;
  194. PCRE_INFO_STUDYSIZE = 10;
  195. PCRE_INFO_DEFAULT_TABLES = 11;
  196. {$ENDIF PCRE_5_0}
  197. {$IFDEF PCRE_7_7}
  198. PCRE_INFO_OKPARTIAL = 12;
  199. PCRE_INFO_JCHANGED = 13;
  200. PCRE_INFO_HASCRORLF = 14;
  201. {$ENDIF}
  202. { Request types for pcre_config() }
  203. {$IFDEF PCRE_5_0}
  204. PCRE_CONFIG_UTF8 = 0;
  205. PCRE_CONFIG_NEWLINE = 1;
  206. PCRE_CONFIG_LINK_SIZE = 2;
  207. PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3;
  208. PCRE_CONFIG_MATCH_LIMIT = 4;
  209. PCRE_CONFIG_STACKRECURSE = 5;
  210. PCRE_CONFIG_UNICODE_PROPERTIES = 6;
  211. {$ENDIF PCRE_5_0}
  212. {$IFDEF PCRE_7_0}
  213. PCRE_CONFIG_MATCH_LIMIT_RECURSION = 7;
  214. {$ENDIF}
  215. {$IFDEF PCRE_7_7}
  216. PCRE_CONFIG_BSR = 8;
  217. {$ENDIF}
  218. { Bit flags for the pcre_extra structure }
  219. {$IFDEF PCRE_5_0}
  220. PCRE_EXTRA_STUDY_DATA = $0001;
  221. PCRE_EXTRA_MATCH_LIMIT = $0002;
  222. PCRE_EXTRA_CALLOUT_DATA = $0004;
  223. PCRE_EXTRA_TABLES = $0008;
  224. {$ENDIF PCRE_5_0}
  225. {$IFDEF PCRE_7_0}
  226. PCRE_EXTRA_MATCH_LIMIT_RECURSION = $0010;
  227. {$ENDIF}
  228. Const
  229. // DefaultOptions : integer = 0;
  230. DefaultLocaleTable : pointer = nil;
  231. {$IFDEF PCRE_5_0}
  232. { The structure for passing additional data to pcre_exec(). This is defined in
  233. such as way as to be extensible. Always add new fields at the end, in order to
  234. remain compatible. }
  235. type ppcre_extra = ^tpcre_extra;
  236. tpcre_extra = record
  237. flags : longint; { Bits for which fields are set }
  238. study_data : pointer; { Opaque data from pcre_study() }
  239. match_limit : longint; { Maximum number of calls to match() }
  240. callout_data : pointer; { Data passed back in callouts }
  241. tables : pointer; { Pointer to character tables }
  242. match_limit_recursion: longint; { Max recursive calls to match() }
  243. end;
  244. type ppcre_callout_block = ^pcre_callout_block;
  245. pcre_callout_block = record
  246. version,
  247. (* ------------------------ Version 0 ------------------------------- *)
  248. callout_number : integer;
  249. offset_vector : pointer;
  250. subject : pchar;
  251. subject_length, start_match, current_position, capture_top,
  252. capture_last : integer;
  253. callout_data : pointer;
  254. (* ------------------- Added for Version 1 -------------------------- *)
  255. pattern_position, next_item_length : integer;
  256. end;
  257. {$ENDIF PCRE_5_0}
  258. {$OrgName+}
  259. {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
  260. { local replacement of external pcre memory management functions }
  261. function pcre_malloc( size : integer ) : pointer;
  262. procedure pcre_free( {var} p : pointer );
  263. {$IFDEF PCRE_5_0}
  264. const pcre_stack_malloc: function ( size : integer ): pointer = pcre_malloc;
  265. pcre_stack_free: procedure ( {var} p : pointer ) = pcre_free;
  266. function pcre_callout(var p : ppcre_callout_block) : integer;
  267. {$ENDIF PCRE_5_0}
  268. {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
  269. Implementation
  270. Uses strings, collect, messages, dnapp, commands, advance0, stringsx
  271. {$IFDEF VIRTUALPASCAL} ,vpsyslow {$ENDIF VIRTUALPASCAL};
  272. Const
  273. MAGIC_NUMBER = $50435245; { 'PCRE' }
  274. MAX_MATCHES = 90; { changed in 3.5 version; should be divisible by 3, was 64}
  275. Type
  276. PMatchArray = ^TMatchArray;
  277. TMatchArray = array[0..( MAX_MATCHES * 3 )] of integer;
  278. PRegExpCollection = ^TRegExpCollection;
  279. TRegExpCollection = object(TSortedCollection)
  280. MaxRegExp : integer;
  281. SearchRegExp : shortstring;
  282. CompareModeInsert : boolean;
  283. constructor Init(AMaxRegExp:integer);
  284. procedure FreeItem(P: Pointer); virtual;
  285. function Compare(P1, P2: Pointer): Integer; virtual;
  286. function Find(ARegExp:shortstring;var P: PpcRegExp):boolean; virtual;
  287. function CheckNew(ARegExp:shortstring):PpcRegExp;virtual;
  288. end;
  289. Var
  290. PRegExpCache : PRegExpCollection;
  291. {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
  292. { imported original pcre functions }
  293. function pcre_compile( const pattern : PChar; options : integer;
  294. var errorptr : PChar; var erroroffset : integer;
  295. const tables : PChar ) : pointer {pcre}; external;
  296. {$IFDEF PCRE_7_0}
  297. function pcre_compile2( const pattern : PChar; options : integer;
  298. var errorcodeptr : Integer;
  299. var errorptr : PChar; var erroroffset : integer;
  300. const tables : PChar ) : pointer {pcre}; external;
  301. {$ENDIF}
  302. {$IFDEF PCRE_5_0}
  303. function pcre_config( what : integer; where : pointer) : integer; external;
  304. function pcre_copy_named_substring( const code : pointer {pcre};
  305. const subject : pchar;
  306. var ovector : integer;
  307. stringcount : integer;
  308. const stringname : pchar;
  309. var buffer : pchar;
  310. size : integer) : integer; external;
  311. function pcre_copy_substring( const subject : pchar; var ovector : integer;
  312. stringcount, stringnumber : integer;
  313. var buffer : pchar; size : integer )
  314. : integer; external;
  315. function pcre_exec( const argument_re : pointer {pcre};
  316. const extra_data : pointer {pcre_extra};
  317. {$ELSE}
  318. function pcre_exec( const external_re : pointer;
  319. const external_extra : pointer;
  320. {$ENDIF}
  321. const subject : PChar;
  322. length, start_offset, options : integer;
  323. offsets : pointer;
  324. offsetcount : integer ) : integer; external;
  325. {$IFDEF PCRE_7_0}
  326. function pcre_dfa_exec( const argument_re : pointer {pcre};
  327. const extra_data : pointer {pcre_extra};
  328. const subject : pchar;
  329. length, start_offset, options : integer;
  330. offsets : pointer;
  331. offsetcount : integer;
  332. workspace : pointer;
  333. wscount : integer ) : integer; external;
  334. {$ENDIF}
  335. {$IFDEF PCRE_5_0}
  336. procedure pcre_free_substring( const p : pchar ); external;
  337. procedure pcre_free_substring_list( var p : pchar ); external;
  338. function pcre_fullinfo( const argument_re : pointer {pcre};
  339. const extra_data : pointer {pcre_extra};
  340. what : integer;
  341. where : pointer ) : integer; external;
  342. function pcre_get_named_substring( const code : pointer {pcre};
  343. const subject : pchar;
  344. var ovector : integer;
  345. stringcount : integer;
  346. const stringname : pchar;
  347. var stringptr : pchar ) : integer; external;
  348. function pcre_get_stringnumber( const code : pointer {pcre};
  349. const stringname : pchar ) : integer; external;
  350. function pcre_get_stringtable_entries( const code : pointer {pcre};
  351. const stringname : pchar;
  352. var firstptr,
  353. lastptr : pchar ) : integer; external;
  354. function pcre_get_substring( const subject : pchar; var ovector : integer;
  355. stringcount, stringnumber : integer;
  356. var stringptr : pchar ) : integer; external;
  357. function pcre_get_substring_list( const subject : pchar; var ovector : integer;
  358. stringcount : integer;
  359. listptr : pointer {const char ***listptr}) : integer; external;
  360. function pcre_info( const argument_re : pointer {pcre};
  361. var optptr : integer;
  362. var first_byte : integer ) : integer; external;
  363. function pcre_maketables : pchar; external;
  364. {$ENDIF}
  365. {$IFDEF PCRE_7_0}
  366. function pcre_refcount( const argument_re : pointer {pcre};
  367. adjust : integer ) : pchar; external;
  368. {$ENDIF}
  369. function pcre_study( const external_re : pointer {pcre};
  370. options : integer;
  371. var errorptr : PChar ) : pointer {pcre_extra}; external;
  372. {$IFDEF PCRE_5_0}
  373. function pcre_version : pchar; external;
  374. {$ENDIF}
  375. function pcre_malloc( size : integer ) : pointer;
  376. begin
  377. GetMem( result, size );
  378. end;
  379. procedure pcre_free( {var} p : pointer );
  380. begin
  381. if (p <> nil) then
  382. FreeMem( p, 0 );
  383. {@p := nil;}
  384. end;
  385. {$IFDEF PCRE_5_0}
  386. (* Called from PCRE as a result of the (?C) item. We print out where we are in
  387. the match. Yield zero unless more callouts than the fail count, or the callout
  388. data is not zero. *)
  389. function pcre_callout;
  390. begin
  391. end;
  392. {$ENDIF}
  393. {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
  394. // Always include the newest version of the library
  395. {$IFDEF PCRE_7_7}
  396. {$L pcre77.lib}
  397. {$ELSE}
  398. {$IFDEF PCRE_7_0}
  399. {$L pcre70.lib}
  400. {$ELSE}
  401. {$IFDEF PCRE_5_0}
  402. {$L pcre50.lib}
  403. {$ELSE}
  404. {$IFDEF PCRE_3_7}
  405. {$L pcre37.lib}
  406. {$ENDIF PCRE_3_7}
  407. {$ENDIF PCRE_5_0}
  408. {$ENDIF PCRE_7_0}
  409. {$ENDIF PCRE_7_7}
  410. {TpcRegExp}
  411. constructor TpcRegExp.Init(const ARegExp:shortstring; AOptions:integer; ALocale : Pointer);
  412. var
  413. pRegExp : PChar;
  414. begin
  415. RegExp:=ARegExp;
  416. RegExpC:=nil;
  417. RegExpExt:=nil;
  418. Matches:=nil;
  419. MatchesCount:=0;
  420. Error:=true;
  421. ErrorMsg:=nil;
  422. ErrorPos:=0;
  423. RunTimeOptions := 0;
  424. if length(RegExp) < 255 then
  425. begin
  426. RegExp[length(RegExp)+1]:=#0;
  427. pRegExp:=@RegExp[1];
  428. end
  429. else
  430. begin
  431. GetMem(pRegExp,length(RegExp)+1);
  432. pRegExp:=strpcopy(pRegExp,RegExp);
  433. end;
  434. RegExpC := pcre_compile( pRegExp,
  435. AOptions and PCRE_COMPILE_ALLOWED_OPTIONS,
  436. ErrorMsg, ErrorPos, ALocale);
  437. if length(RegExp) = 255 then
  438. StrDispose(pRegExp);
  439. if RegExpC = nil then
  440. exit;
  441. ErrorMsg:=nil;
  442. RegExpExt := pcre_study( RegExpC, 0, ErrorMsg );
  443. if (RegExpExt = nil) and (ErrorMsg <> nil) then
  444. begin
  445. pcre_free(RegExpC);
  446. exit;
  447. end;
  448. GetMem(Matches,SizeOf(TMatchArray));
  449. Error:=false;
  450. end;
  451. destructor TpcRegExp.Done;
  452. begin
  453. if RegExpC <> nil then
  454. pcre_free(RegExpC);
  455. if RegExpExt <> nil then
  456. pcre_free(RegExpExt);
  457. if Matches <> nil then
  458. FreeMem(Matches,SizeOf(TMatchArray));
  459. end;
  460. function TpcRegExp.SearchNext( AStr: Pchar; ALen : longint ) : boolean;
  461. var Options: Integer;
  462. begin // must handle PCRE_ERROR_PARTIAL here
  463. Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
  464. PCRE_EXEC_ALLOWED_OPTIONS;
  465. if MatchesCount > 0 then
  466. MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, PMatchArray(Matches)^[1],
  467. Options, Matches, MAX_MATCHES ) else
  468. MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, 0,
  469. Options, Matches, MAX_MATCHES );
  470. { if MatchesCount = 0 then
  471. MatchesCount := MatchesCount div 3;}
  472. PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
  473. SearchNext := MatchesCount > 0;
  474. end;
  475. function TpcRegExp.Search( AStr: Pchar; ALen : longint):boolean;
  476. begin
  477. MatchesCount:=0;
  478. Search:=SearchNext(AStr,ALen);
  479. SourceLen:=ALen;
  480. end;
  481. function TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint ) : boolean;
  482. var Options: Integer;
  483. begin
  484. MatchesCount:=0;
  485. Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
  486. PCRE_EXEC_ALLOWED_OPTIONS;
  487. MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, AOfs,
  488. Options, Matches, MAX_MATCHES );
  489. PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
  490. SearchOfs := MatchesCount > 0;
  491. SourceLen := ALen-AOfs;
  492. end;
  493. function TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint):boolean;
  494. begin
  495. if (MatchesCount > 0) and (ANom <= (MatchesCount-1)) then
  496. begin
  497. ANom:=ANom*2;
  498. Pos:=PMatchArray(Matches)^[ANom];
  499. Len:=PMatchArray(Matches)^[ANom+1]-Pos;
  500. MatchSub:=true;
  501. end
  502. else
  503. MatchSub:=false;
  504. end;
  505. function TpcRegExp.MatchFull(var Pos,Len:longint):boolean;
  506. begin
  507. MatchFull:=MatchSub(0,Pos,Len);
  508. end;
  509. function TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar):string;
  510. var
  511. s: ansistring;
  512. pos,len: longint;
  513. begin
  514. s:='';
  515. if MatchSub(ANom, pos, len) then
  516. begin
  517. setlength(s, len);
  518. Move(AStr[pos], s[1], len);
  519. end;
  520. GetSubStr:=s;
  521. end;
  522. function TpcRegExp.GetPreSubStr(AStr: Pchar):string;
  523. var
  524. s: ansistring;
  525. l: longint;
  526. begin
  527. s:='';
  528. if (MatchesCount > 0) then
  529. begin
  530. l:=PMatchArray(Matches)^[0]-1;
  531. if l > 0 then
  532. begin
  533. setlength(s,l);
  534. Move(AStr[1],s[1],l);
  535. end;
  536. end;
  537. GetPreSubStr:=s;
  538. end;
  539. function TpcRegExp.GetPostSubStr(AStr: Pchar):string;
  540. var
  541. s: ansistring;
  542. l: longint;
  543. ANom: integer;
  544. begin
  545. s:='';
  546. if (MatchesCount > 0) then
  547. begin
  548. ANom:=(MatchesCount-1){*2} shl 1;
  549. l:=SourceLen-PMatchArray(Matches)^[ANom+1]+1;
  550. if l > 0 then
  551. begin
  552. setlength(s,l);
  553. Move(AStr[PMatchArray(Matches)^[ANom+1]],s[1],l);
  554. end;
  555. end;
  556. GetPostSubStr:=s;
  557. end;
  558. function TpcRegExp.GetFullStr(AStr: Pchar):string;
  559. var
  560. s: ansistring;
  561. l: longint;
  562. begin
  563. GetFullStr:=GetSubStr(0,AStr);
  564. end;
  565. function TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string):string;
  566. var
  567. s: ansistring;
  568. l,i,lasti: longint;
  569. begin
  570. l:=length(ARepl);
  571. i:=1;
  572. lasti:=1;
  573. s:='';
  574. while i <= l do
  575. begin
  576. case ARepl[i] of
  577. '\' :
  578. begin
  579. if i < l then
  580. begin
  581. s:=s+copy(ARepl,lasti,i-lasti){+ARepl[i+1]};
  582. {AH 17-10-05 support for POSIX \1-\9 backreferences}
  583. case ARepl[i+1] of
  584. '0' : s:=s+GetFullStr(AStr);
  585. '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
  586. else s:=s+ARepl[i+1]; // copy the escaped character
  587. end;
  588. end;
  589. inc(i);
  590. lasti:=i+1;
  591. end;
  592. '$' :
  593. begin
  594. if i < l then
  595. begin
  596. s:=s+copy(ARepl,lasti,i-lasti);
  597. case ARepl[i+1] of
  598. '&' : s:=s+GetFullStr(AStr);
  599. '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
  600. '`' : s:=s+GetPreSubStr(AStr);
  601. #39 : s:=s+GetPostSubStr(AStr);
  602. end;
  603. end;
  604. inc(i);
  605. lasti:=i+1;
  606. end;
  607. end;
  608. inc(i);
  609. end;
  610. if lasti <= {AH 25-10-2004 added =, else l==1 won't work} l then
  611. s:=s+copy(ARepl,lasti,l-lasti+1);
  612. GetReplStr:=s;
  613. end;
  614. function TpcRegExp.ErrorStr:string;
  615. begin
  616. ErrorStr:=StrPas(ErrorMsg);
  617. end;
  618. {TRegExpCollection}
  619. constructor TRegExpCollection.Init(AMaxRegExp: integer);
  620. begin
  621. Inherited Init(1,1);
  622. MaxRegExp:=AMaxRegExp;
  623. CompareModeInsert:=true;
  624. end;
  625. procedure TRegExpCollection.FreeItem(P: Pointer);
  626. begin
  627. if P <> nil then
  628. begin
  629. Dispose(PpcRegExp(P),Done);
  630. end;
  631. end;
  632. function TRegExpCollection.Compare(P1, P2: Pointer): Integer;
  633. //var
  634. // l,l1,l2,i : byte;
  635. //// wPos: pchar;
  636. begin
  637. if CompareModeInsert then
  638. begin
  639. // l1:=length(PpcRegExp(P1)^.RegExp);
  640. // l2:=length(PpcRegExp(P2)^.RegExp);
  641. // if l1 > l2 then l:=l2 else
  642. // l:=l1;
  643. // for i:=1 to l do
  644. // if PpcRegExp(P1).RegExp[i] <> PpcRegExp(P2).RegExp[i] then break;
  645. // if i <=l then
  646. // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(PpcRegExp(P2).RegExp[i]) else
  647. // Compare:=l1-l2;
  648. Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, PpcRegExp(P2).RegExp, False);
  649. end
  650. else
  651. begin
  652. // l1:=length(PpcRegExp(P1)^.RegExp);
  653. // l2:=length(SearchRegExp);
  654. // if l1 > l2 then l:=l2 else
  655. // l:=l1;
  656. // for i:=1 to l do
  657. // if PpcRegExp(P1).RegExp[i] <> SearchRegExp[i] then
  658. // begin
  659. // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(SearchRegExp[i]);
  660. // break;
  661. // end;
  662. // if i > l then Compare:=l1-l2;
  663. Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, SearchRegExp, False);
  664. end;
  665. end;
  666. function TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp):boolean;
  667. var I : integer;
  668. begin
  669. CompareModeInsert:=false;
  670. SearchRegExp:=ARegExp;
  671. if Search(nil,I) then
  672. begin
  673. P:=PpcRegExp(At(I));
  674. Find:=true;
  675. end
  676. else
  677. begin
  678. P:=nil;
  679. Find:=false;
  680. end;
  681. CompareModeInsert:=true;
  682. end;
  683. function TRegExpCollection.CheckNew(ARegExp:shortstring):PpcRegExp;
  684. var
  685. P : PpcRegExp;
  686. begin
  687. if not Find(ARegExp,P) then
  688. begin
  689. if Count = MaxRegExp then
  690. AtFree(0);
  691. P:=New(ppcRegExp,Init(ARegExp,PCRE_CASELESS,nil));
  692. Insert(P);
  693. end;
  694. CheckNew:=P;
  695. end;
  696. function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
  697. var
  698. PpcRE:PpcRegExp;
  699. begin
  700. PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
  701. pcGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
  702. Dispose(PpcRE,Done);
  703. end;
  704. function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
  705. var
  706. PpcRE:PpcRegExp;
  707. begin
  708. PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
  709. if PpcRE^.Search(pchar(AStr),Length(AStr)) then
  710. pcGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
  711. else
  712. pcGrepSub:='';
  713. Dispose(PpcRE,Done);
  714. end;
  715. function pcFastGrepMatch(WildCard, aStr: string): Boolean;
  716. var
  717. PpcRE:PpcRegExp;
  718. begin
  719. PpcRE:=PRegExpCache^.CheckNew(WildCard);
  720. pcFastGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
  721. end;
  722. function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
  723. var
  724. PpcRE:PpcRegExp;
  725. begin
  726. PpcRE:=PRegExpCache^.CheckNew(WildCard);
  727. if PpcRE^.Search(pchar(AStr),Length(AStr)) then
  728. pcFastGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
  729. else
  730. pcFastGrepSub:='';
  731. end;
  732. {$IFDEF PCRE_5_0}
  733. function pcGetVersion : pchar; assembler; {$FRAME-}{$USES none}
  734. asm
  735. call pcre_version
  736. end;
  737. {$ENDIF PCRE_5_0}
  738. function pcError;
  739. var P: ppcRegExp absolute pRegExp;
  740. begin
  741. Result := (P = nil) or P^.Error;
  742. If Result and (P <> nil) then
  743. begin
  744. { if P^.ErrorPos = 0 then
  745. MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"', nil,mfConfirmation+mfOkButton)
  746. else}
  747. MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"'+GetString(erRegExpCompPos),
  748. @P^.ErrorPos,mfConfirmation+mfOkButton);
  749. Dispose(P, Done);
  750. P:=nil;
  751. end;
  752. end;
  753. function pcInit;
  754. var Options : Integer;
  755. begin
  756. If CaseSens then Options := 0 else Options := PCRE_CASELESS;
  757. Result := New( PpcRegExp, Init( Pattern,
  758. {DefaultOptions}
  759. startup.MiscMultiData.cfgRegEx.DefaultOptions or Options,
  760. DefaultLocaleTable) );
  761. end;
  762. Initialization
  763. PRegExpCache:=New(PRegExpCollection,Init(64));
  764. Finalization
  765. Dispose(PRegExpCache,Done);
  766. End.