Tokanizer.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /*------------------------------------------------------------------------------
  2. * NAME : Tokanizer.js
  3. * PURPOSE : Parse a string a make an array of tokens. The following tokens are
  4. * reconized.
  5. * ()
  6. * ^ * / % + -
  7. * ! & | TRUE FALSE
  8. * < <= > >= <> =
  9. * AVG ABS ACOS ASC ASIN ATAN CDATE CHR COS DATE FIX HEX IIF
  10. * LCASE LEFT LOG MAX MID MIN RIGHT ROUND SIN SQRT TAN UCASE
  11. * , ' "
  12. * AUTHOR : Prasad P. Khandekar
  13. * CREATED : August 19, 2005
  14. *------------------------------------------------------------------------------
  15. * -3 // Negative 3 - is the first token
  16. * 3+-2 // Negative 2 - previous token is an operator and next is a digit
  17. * 3*-(2) // Negative 2 - previous token is an operator and next is an opening brace
  18. * 3*ABS(-2) // Negative 2 - previous token is an opening brace and next is a digit
  19. * 3+-SQR(4) // Negative SQR - previous token is an operator and next is a alpha
  20. *
  21. * 3-2 // Positive 2 - previous token is a digit and next is a digit
  22. * 3 - 2 // Positive 2 - previous token is a digit or space and next is a space
  23. * ABS(3.4)-2 // Positive 2 - previous token is a closing brace and next is a digit
  24. * ABS(3.4)- 2 // Positive 2 - previous token is a digit and next is a space
  25. * ABS(3.4) - 2 // Positive 2 - previous token is a closing brace or space and next is a space
  26. *------------------------------------------------------------------------------
  27. * Copyright (c) 2005. Khan Information Systems. All Rights Reserved
  28. * The contents of this file are subject to the KIS Public License 1.0
  29. * (the "License"); you may not use this file except in compliance with the
  30. * License. You should have received a copy of the KIS Public License along with
  31. * this library; if not, please ask your software vendor to provide one.
  32. *
  33. * YOU AGREE THAT THE PROGRAM IS PROVIDED AS-IS, WITHOUT WARRANTY OF ANY KIND
  34. * (EITHER EXPRESS OR IMPLIED) INCLUDING, WITHOUT LIMITATION, ANY IMPLIED
  35. * WARRANTY OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, AND ANY
  36. * WARRANTY OF NON INFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR
  37. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  39. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  40. * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  41. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THE
  42. * PROGRAM, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. *
  44. * See the License for the specific language governing rights and limitations
  45. * under the License.
  46. *-----------------------------------------------------------------------------*/
  47. var lstAlpha = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,uv,w,x,y,z";
  48. var lstDigits = "0,1,2,3,4,5,6,7,8,9";
  49. var lstArithOps = "^,*,/,%,+,-";
  50. var lstLogicOps = "!,&,|";
  51. var lstCompaOps = "<,<=,>,>=,<>,=";
  52. var lstFuncOps = ["AVG","ABS","ACOS","ASC","ASIN","ATAN","CDATE","CHR","COS","DATE","FIX","HEX","IIF","LCASE","LEFT","LOG","MAX","MID","MIN","RIGHT","ROUND","SIN","SQRT","TAN","UCASE"];
  53. /*------------------------------------------------------------------------------
  54. * NAME : Tokanize
  55. * PURPOSE : Breaks the string into a token array. It also checks whether the
  56. * parenthesis, single quotes and double quotes are balanced or not.
  57. * PARAMETERS : pstrExpression - The string from which token array is to be
  58. * constructed.
  59. * RETURNS : An array of tokens.
  60. * THROWS : Unterminated string constant - Single/Double quotes are not
  61. * properly terminated
  62. * Unbalanced parenthesis - Opening/closing braces are not balanced
  63. *----------------------------------------------------------------------------*/
  64. function Tokanize(pstrExpression)
  65. {
  66. var intCntr, intBraces;
  67. var arrTokens;
  68. var intIndex, intPos;
  69. var chrChar, chrNext;
  70. var strToken, prevToken;
  71. intCntr = 0;
  72. intBraces = 0;
  73. intIndex = 0;
  74. strToken = "";
  75. arrTokens = new Array();
  76. pstrExpression = Trim(pstrExpression);
  77. while (intCntr < pstrExpression.length)
  78. {
  79. prevToken = "";
  80. chrChar = pstrExpression.substr(intCntr, 1);
  81. if (window)
  82. if (window.status)
  83. window.status = "Processing " + chrChar;
  84. switch (chrChar)
  85. {
  86. case " " :
  87. if (strToken.length > 0)
  88. {
  89. arrTokens[intIndex] = strToken;
  90. intIndex++;
  91. strToken = "";
  92. }
  93. break;
  94. case "(":
  95. intBraces++;
  96. if (strToken.length > 0)
  97. {
  98. arrTokens[intIndex] = strToken;
  99. intIndex++;
  100. strToken = "";
  101. }
  102. arrTokens[intIndex] = chrChar;
  103. intIndex++;
  104. break;
  105. case ")" :
  106. intBraces--;
  107. if (strToken.length > 0)
  108. {
  109. arrTokens[intIndex] = strToken;
  110. intIndex++;
  111. strToken = "";
  112. }
  113. arrTokens[intIndex] = chrChar;
  114. intIndex++;
  115. break;
  116. case "^" :
  117. if (strToken.length > 0)
  118. {
  119. arrTokens[intIndex] = strToken;
  120. intIndex++;
  121. strToken = "";
  122. }
  123. arrTokens[intIndex] = chrChar;
  124. intIndex++;
  125. break;
  126. case "*" :
  127. if (strToken.length > 0)
  128. {
  129. arrTokens[intIndex] = strToken;
  130. intIndex++;
  131. strToken = "";
  132. }
  133. arrTokens[intIndex] = chrChar;
  134. intIndex++;
  135. break;
  136. case "/" :
  137. if (strToken.length > 0)
  138. {
  139. arrTokens[intIndex] = strToken;
  140. intIndex++;
  141. strToken = "";
  142. }
  143. arrTokens[intIndex] = chrChar;
  144. intIndex++;
  145. break;
  146. case "%" :
  147. if (strToken.length > 0)
  148. {
  149. arrTokens[intIndex] = strToken;
  150. intIndex++;
  151. strToken = "";
  152. }
  153. arrTokens[intIndex] = chrChar;
  154. intIndex++;
  155. break;
  156. case "&" :
  157. if (strToken.length > 0)
  158. {
  159. arrTokens[intIndex] = strToken;
  160. intIndex++;
  161. strToken = "";
  162. }
  163. arrTokens[intIndex] = chrChar;
  164. intIndex++;
  165. break;
  166. case "|" :
  167. if (strToken.length > 0)
  168. {
  169. arrTokens[intIndex] = strToken;
  170. intIndex++;
  171. strToken = "";
  172. }
  173. arrTokens[intIndex] = chrChar;
  174. intIndex++;
  175. break;
  176. case "," :
  177. if (strToken.length > 0)
  178. {
  179. arrTokens[intIndex] = strToken;
  180. intIndex++;
  181. strToken = "";
  182. }
  183. arrTokens[intIndex] = chrChar;
  184. intIndex++;
  185. break;
  186. case "-" :
  187. if (strToken.length > 0)
  188. {
  189. arrTokens[intIndex] = strToken;
  190. intIndex++;
  191. strToken = "";
  192. }
  193. chrNext = pstrExpression.substr(intCntr + 1, 1);
  194. if (arrTokens.length > 0)
  195. prevToken = arrTokens[intIndex - 1];
  196. if (intCntr == 0 || ((IsOperator(prevToken) ||
  197. prevToken == "(" || prevToken == ",") &&
  198. (IsDigit(chrNext) || chrNext == "(")))
  199. {
  200. // Negative Number
  201. strToken += chrChar;
  202. }
  203. else
  204. {
  205. arrTokens[intIndex] = chrChar;
  206. intIndex++;
  207. strToken = "";
  208. }
  209. break;
  210. case "+" :
  211. if (strToken.length > 0)
  212. {
  213. arrTokens[intIndex] = strToken;
  214. intIndex++;
  215. strToken = "";
  216. }
  217. chrNext = pstrExpression.substr(intCntr + 1, 1);
  218. if (arrTokens.length > 0)
  219. prevToken = arrTokens[intIndex - 1];
  220. if (intCntr == 0 || ((IsOperator(prevToken) ||
  221. prevToken == "(" || prevToken == ",") &&
  222. (IsDigit(chrNext) || chrNext == "(")))
  223. {
  224. // positive Number
  225. strToken += chrChar;
  226. }
  227. else
  228. {
  229. arrTokens[intIndex] = chrChar;
  230. intIndex++;
  231. strToken = "";
  232. }
  233. break;
  234. case "<" :
  235. chrNext = pstrExpression.substr(intCntr + 1, 1);
  236. if (strToken.length > 0)
  237. {
  238. arrTokens[intIndex] = strToken;
  239. intIndex++;
  240. strToken = "";
  241. }
  242. if (chrNext == "=")
  243. {
  244. arrTokens[intIndex] = chrChar + "=";
  245. intIndex++;
  246. intCntr++;
  247. }
  248. else if (chrNext == ">")
  249. {
  250. arrTokens[intIndex] = chrChar + ">";
  251. intIndex++;
  252. intCntr++;
  253. }
  254. else
  255. {
  256. arrTokens[intIndex] = chrChar;
  257. intIndex++;
  258. }
  259. break;
  260. case ">" :
  261. chrNext = pstrExpression.substr(intCntr + 1, 1);
  262. if (strToken.length > 0)
  263. {
  264. arrTokens[intIndex] = strToken;
  265. intIndex++;
  266. strToken = "";
  267. }
  268. if (chrNext == "=")
  269. {
  270. arrTokens[intIndex] = chrChar + "=";
  271. intIndex++;
  272. intCntr++;
  273. }
  274. else
  275. {
  276. arrTokens[intIndex] = chrChar;
  277. intIndex++;
  278. }
  279. break;
  280. case "=" :
  281. if (strToken.length > 0)
  282. {
  283. arrTokens[intIndex] = strToken;
  284. intIndex++;
  285. strToken = "";
  286. }
  287. arrTokens[intIndex] = chrChar;
  288. intIndex++;
  289. break;
  290. case "'" :
  291. if (strToken.length > 0)
  292. {
  293. arrTokens[intIndex] = strToken;
  294. intIndex++;
  295. strToken = "";
  296. }
  297. intPos = pstrExpression.indexOf(chrChar, intCntr + 1);
  298. if (intPos < 0)
  299. throw "Unterminated string constant";
  300. else
  301. {
  302. strToken += pstrExpression.substring(intCntr + 1, intPos);
  303. arrTokens[intIndex] = strToken;
  304. intIndex++;
  305. strToken = "";
  306. intCntr = intPos;
  307. }
  308. break;
  309. case "\"" :
  310. if (strToken.length > 0)
  311. {
  312. arrTokens[intIndex] = strToken;
  313. intIndex++;
  314. strToken = "";
  315. }
  316. intPos = pstrExpression.indexOf(chrChar, intCntr + 1);
  317. if (intPos < 0)
  318. {
  319. throw "Unterminated string constant";
  320. }
  321. else
  322. {
  323. strToken += pstrExpression.substring(intCntr + 1, intPos);
  324. arrTokens[intIndex] = strToken;
  325. intIndex++;
  326. strToken = "";
  327. intCntr = intPos;
  328. }
  329. break;
  330. default :
  331. strToken += chrChar;
  332. break;
  333. }
  334. intCntr++;
  335. }
  336. if (intBraces > 0)
  337. throw "Unbalanced parenthesis!";
  338. if (strToken.length > 0)
  339. arrTokens[intIndex] = strToken;
  340. return arrTokens;
  341. }
  342. /*------------------------------------------------------------------------------
  343. * NAME : IsDigit
  344. * PURPOSE : Checks whether the character specified by chrArg is a numeric
  345. * character.
  346. * PARAMETERS : chrArg - The character to be checked
  347. * RETURNS : False - If chrArg is not a numeric character
  348. * True - Otherwise
  349. *----------------------------------------------------------------------------*/
  350. function IsDigit(chrArg)
  351. {
  352. if (lstDigits.indexOf(chrArg) >= 0)
  353. return true;
  354. return false;
  355. }
  356. /*------------------------------------------------------------------------------
  357. * NAME : IsAlpha
  358. * PURPOSE : Checks whether the character specified by chrArg is a alphabet
  359. * PARAMETERS : chrArg - The character to be checked
  360. * RETURNS : False - If chrArg is not a alphabet
  361. * True - Otherwise
  362. *----------------------------------------------------------------------------*/
  363. function IsAlpha(chrArg)
  364. {
  365. if (lstAlpha.indexOf(chrArg) >= 0 ||
  366. lstAlpha.toUpperCase().indexOf(chrArg) >= 0)
  367. return true;
  368. return false;
  369. }
  370. /*------------------------------------------------------------------------------
  371. * NAME : IsOperator
  372. * PURPOSE : Checks whether the string specified by strArg is an operator
  373. * PARAMETERS : strArg - The string to be checked
  374. * RETURNS : False - If strArg is not an operator symbol
  375. * True - Otherwise
  376. *----------------------------------------------------------------------------*/
  377. function IsOperator(strArg)
  378. {
  379. if (lstArithOps.indexOf(strArg) >= 0 || lstCompaOps.indexOf(strArg) >= 0)
  380. return true;
  381. return false;
  382. }
  383. /*------------------------------------------------------------------------------
  384. * NAME : IsFunction
  385. * PURPOSE : Checks whether the string specified by strArg is a function name
  386. * PARAMETERS : strArg - The string to be checked
  387. * RETURNS : False - If strArg is not a valid built-in function name.
  388. * True - Otherwise
  389. *----------------------------------------------------------------------------*/
  390. function IsFunction(strArg)
  391. {
  392. var idx = 0;
  393. strArg = strArg.toUpperCase();
  394. for (idx = 0; idx < lstFuncOps.length; idx++)
  395. {
  396. if (strArg == lstFuncOps[idx])
  397. return true;
  398. }
  399. return false;
  400. }
  401. /*------------------------------------------------------------------------------
  402. * NAME : Trim
  403. * PURPOSE : Removes trailing and leading spaces from a string.
  404. * PARAMETERS : pstrVal - The string from which leading and trailing spaces are
  405. * to be removed.
  406. * RETURNS : A string with leading and trailing spaces removed.
  407. *----------------------------------------------------------------------------*/
  408. function Trim(pstrVal)
  409. {
  410. if (pstrVal.length < 1) return "";
  411. pstrVal = RTrim(pstrVal);
  412. pstrVal = LTrim(pstrVal);
  413. if (pstrVal == "")
  414. return "";
  415. else
  416. return pstrVal;
  417. }
  418. /*------------------------------------------------------------------------------
  419. * NAME : RTrim
  420. * PURPOSE : Removes trailing spaces from a string.
  421. * PARAMETERS : pstrValue - The string from which trailing spaces are to be removed.
  422. * RETURNS : A string with trailing spaces removed.
  423. *----------------------------------------------------------------------------*/
  424. function RTrim(pstrValue)
  425. {
  426. var w_space = String.fromCharCode(32);
  427. var v_length = pstrValue.length;
  428. var strTemp = "";
  429. if(v_length < 0)
  430. {
  431. return"";
  432. }
  433. var iTemp = v_length - 1;
  434. while(iTemp > -1)
  435. {
  436. if(pstrValue.charAt(iTemp) == w_space)
  437. {
  438. }
  439. else
  440. {
  441. strTemp = pstrValue.substring(0, iTemp + 1);
  442. break;
  443. }
  444. iTemp = iTemp - 1;
  445. }
  446. return strTemp;
  447. }
  448. /*------------------------------------------------------------------------------
  449. * NAME : LTrim
  450. * PURPOSE : Removes leading spaces from a string.
  451. * PARAMETERS : pstrValue - The string from which leading spaces are to be removed.
  452. * RETURNS : A string with leading spaces removed.
  453. *----------------------------------------------------------------------------*/
  454. function LTrim(pstrValue)
  455. {
  456. var w_space = String.fromCharCode(32);
  457. if(v_length < 1)
  458. {
  459. return "";
  460. }
  461. var v_length = pstrValue.length;
  462. var strTemp = "";
  463. var iTemp = 0;
  464. while(iTemp < v_length)
  465. {
  466. if(pstrValue.charAt(iTemp) == w_space)
  467. {
  468. }
  469. else
  470. {
  471. strTemp = pstrValue.substring(iTemp, v_length);
  472. break;
  473. }
  474. iTemp = iTemp + 1;
  475. }
  476. return strTemp;
  477. }