XS.php 65 KB


  1. <?php
  2. /**
  3. * Xunsearch PHP-SDK 引导文件
  4. *
  5. * 这个文件是由开发工具中的 'build lite' 指令智能合并类定义的源码文件
  6. * 并删除所有注释而自动生成的。
  7. *
  8. * 当您编写搜索项目时,先通过 require 引入该文件即可使用所有的 PHP-SDK
  9. * 功能。合并的主要目的是便于拷贝,只要复制这个库文件即可,而不用拷贝一
  10. * 大堆文件。详细文档请阅读 {@link:http://www.xunsearch.com/doc/php/}
  11. *
  12. * 切勿手动修改本文件!生成时间:2012/07/02 13:21:40
  13. *
  14. * @author hightman
  15. * @link http://www.xunsearch.com/
  16. * @copyright Copyright &copy; 2011 HangZhou YunSheng Network Technology Co., Ltd.
  17. * @license http://www.xunsearch.com/license/
  18. * @version $Id$
  19. */
  20. define('CMD_NONE', 0);
  21. define('CMD_DEFAULT', CMD_NONE);
  22. define('CMD_PROTOCOL', 20110707);
  23. define('CMD_USE', 1);
  24. define('CMD_HELLO', 1);
  25. define('CMD_DEBUG', 2);
  26. define('CMD_TIMEOUT', 3);
  27. define('CMD_QUIT', 4);
  28. define('CMD_INDEX_SET_DB', 32);
  29. define('CMD_INDEX_GET_DB', 33);
  30. define('CMD_INDEX_SUBMIT', 34);
  31. define('CMD_INDEX_REMOVE', 35);
  32. define('CMD_INDEX_EXDATA', 36);
  33. define('CMD_INDEX_CLEAN_DB', 37);
  34. define('CMD_DELETE_PROJECT', 38);
  35. define('CMD_INDEX_COMMIT', 39);
  36. define('CMD_INDEX_REBUILD', 40);
  37. define('CMD_FLUSH_LOGGING', 41);
  38. define('CMD_INDEX_SYNONYMS', 42);
  39. define('CMD_SEARCH_DB_TOTAL', 64);
  40. define('CMD_SEARCH_GET_TOTAL', 65);
  41. define('CMD_SEARCH_GET_RESULT', 66);
  42. define('CMD_SEARCH_SET_DB', CMD_INDEX_SET_DB);
  43. define('CMD_SEARCH_GET_DB', CMD_INDEX_GET_DB);
  44. define('CMD_SEARCH_ADD_DB', 68);
  45. define('CMD_SEARCH_FINISH', 69);
  46. define('CMD_SEARCH_DRAW_TPOOL', 70);
  47. define('CMD_SEARCH_ADD_LOG', 71);
  48. define('CMD_SEARCH_GET_SYNONYMS', 72);
  49. define('CMD_SEARCH_SCWS_GET', 73);
  50. define('CMD_QUERY_GET_STRING', 96);
  51. define('CMD_QUERY_GET_TERMS', 97);
  52. define('CMD_QUERY_GET_CORRECTED', 98);
  53. define('CMD_QUERY_GET_EXPANDED', 99);
  54. define('CMD_OK', 128);
  55. define('CMD_ERR', 129);
  56. define('CMD_SEARCH_RESULT_DOC', 140);
  57. define('CMD_SEARCH_RESULT_FIELD', 141);
  58. define('CMD_SEARCH_RESULT_FACETS', 142);
  59. define('CMD_DOC_TERM', 160);
  60. define('CMD_DOC_VALUE', 161);
  61. define('CMD_DOC_INDEX', 162);
  62. define('CMD_INDEX_REQUEST', 163);
  63. define('CMD_IMPORT_HEADER', 191);
  64. define('CMD_SEARCH_SET_SORT', 192);
  65. define('CMD_SEARCH_SET_CUT', 193);
  66. define('CMD_SEARCH_SET_NUMERIC', 194);
  67. define('CMD_SEARCH_SET_COLLAPSE', 195);
  68. define('CMD_SEARCH_KEEPALIVE', 196);
  69. define('CMD_SEARCH_SET_FACETS', 197);
  70. define('CMD_SEARCH_SCWS_SET', 198);
  71. define('CMD_QUERY_INIT', 224);
  72. define('CMD_QUERY_PARSE', 225);
  73. define('CMD_QUERY_TERM', 226);
  74. define('CMD_QUERY_RANGEPROC', 227);
  75. define('CMD_QUERY_RANGE', 228);
  76. define('CMD_QUERY_VALCMP', 229);
  77. define('CMD_QUERY_PREFIX', 230);
  78. define('CMD_QUERY_PARSEFLAG', 231);
  79. define('CMD_SORT_TYPE_RELEVANCE', 0);
  80. define('CMD_SORT_TYPE_DOCID', 1);
  81. define('CMD_SORT_TYPE_VALUE', 2);
  82. define('CMD_SORT_TYPE_MULTI', 3);
  83. define('CMD_SORT_TYPE_MASK', 0x3f);
  84. define('CMD_SORT_FLAG_ASCENDING', 0x80);
  85. define('CMD_QUERY_OP_AND', 0);
  86. define('CMD_QUERY_OP_OR', 1);
  87. define('CMD_QUERY_OP_AND_NOT', 2);
  88. define('CMD_QUERY_OP_XOR', 3);
  89. define('CMD_QUERY_OP_AND_MAYBE', 4);
  90. define('CMD_QUERY_OP_FILTER', 5);
  91. define('CMD_RANGE_PROC_STRING', 0);
  92. define('CMD_RANGE_PROC_DATE', 1);
  93. define('CMD_RANGE_PROC_NUMBER', 2);
  94. define('CMD_VALCMP_LE', 0);
  95. define('CMD_VALCMP_GE', 1);
  96. define('CMD_PARSE_FLAG_BOOLEAN', 1);
  97. define('CMD_PARSE_FLAG_PHRASE', 2);
  98. define('CMD_PARSE_FLAG_LOVEHATE', 4);
  99. define('CMD_PARSE_FLAG_BOOLEAN_ANY_CASE', 8);
  100. define('CMD_PARSE_FLAG_WILDCARD', 16);
  101. define('CMD_PARSE_FLAG_PURE_NOT', 32);
  102. define('CMD_PARSE_FLAG_PARTIAL', 64);
  103. define('CMD_PARSE_FLAG_SPELLING_CORRECTION', 128);
  104. define('CMD_PARSE_FLAG_SYNONYM', 256);
  105. define('CMD_PARSE_FLAG_AUTO_SYNONYMS', 512);
  106. define('CMD_PARSE_FLAG_AUTO_MULTIWORD_SYNONYMS', 1536);
  107. define('CMD_PREFIX_NORMAL', 0);
  108. define('CMD_PREFIX_BOOLEAN', 1);
  109. define('CMD_INDEX_WEIGHT_MASK', 0x3f);
  110. define('CMD_INDEX_FLAG_WITHPOS', 0x40);
  111. define('CMD_INDEX_FLAG_SAVEVALUE', 0x80);
  112. define('CMD_INDEX_FLAG_CHECKSTEM', 0x80);
  113. define('CMD_VALUE_FLAG_NUMERIC', 0x80);
  114. define('CMD_INDEX_REQUEST_ADD', 0);
  115. define('CMD_INDEX_REQUEST_UPDATE', 1);
  116. define('CMD_INDEX_SYNONYMS_ADD', 0);
  117. define('CMD_INDEX_SYNONYMS_DEL', 1);
  118. define('CMD_SCWS_GET_VERSION', 1);
  119. define('CMD_SCWS_GET_RESULT', 2);
  120. define('CMD_SCWS_GET_TOPS', 3);
  121. define('CMD_SCWS_HAS_WORD', 4);
  122. define('CMD_SCWS_SET_IGNORE', 50);
  123. define('CMD_SCWS_SET_MULTI', 51);
  124. define('CMD_SCWS_SET_DUALITY', 52);
  125. define('CMD_ERR_UNKNOWN', 600);
  126. define('CMD_ERR_NOPROJECT', 401);
  127. define('CMD_ERR_TOOLONG', 402);
  128. define('CMD_ERR_INVALIDCHAR', 403);
  129. define('CMD_ERR_EMPTY', 404);
  130. define('CMD_ERR_NOACTION', 405);
  131. define('CMD_ERR_RUNNING', 406);
  132. define('CMD_ERR_REBUILDING', 407);
  133. define('CMD_ERR_WRONGPLACE', 450);
  134. define('CMD_ERR_WRONGFORMAT', 451);
  135. define('CMD_ERR_EMPTYQUERY', 452);
  136. define('CMD_ERR_TIMEOUT', 501);
  137. define('CMD_ERR_IOERR', 502);
  138. define('CMD_ERR_NOMEM', 503);
  139. define('CMD_ERR_BUSY', 504);
  140. define('CMD_ERR_UNIMP', 505);
  141. define('CMD_ERR_NODB', 506);
  142. define('CMD_ERR_DBLOCKED', 507);
  143. define('CMD_ERR_CREATE_HOME', 508);
  144. define('CMD_ERR_INVALID_HOME', 509);
  145. define('CMD_ERR_REMOVE_HOME', 510);
  146. define('CMD_ERR_REMOVE_DB', 511);
  147. define('CMD_ERR_STAT', 512);
  148. define('CMD_ERR_OPEN_FILE', 513);
  149. define('CMD_ERR_TASK_CANCELED', 514);
  150. define('CMD_ERR_XAPIAN', 515);
  151. define('CMD_OK_INFO', 200);
  152. define('CMD_OK_PROJECT', 201);
  153. define('CMD_OK_QUERY_STRING', 202);
  154. define('CMD_OK_DB_TOTAL', 203);
  155. define('CMD_OK_QUERY_TERMS', 204);
  156. define('CMD_OK_QUERY_CORRECTED', 205);
  157. define('CMD_OK_SEARCH_TOTAL', 206);
  158. define('CMD_OK_RESULT_BEGIN', CMD_OK_SEARCH_TOTAL);
  159. define('CMD_OK_RESULT_END', 207);
  160. define('CMD_OK_TIMEOUT_SET', 208);
  161. define('CMD_OK_FINISHED', 209);
  162. define('CMD_OK_LOGGED', 210);
  163. define('CMD_OK_RQST_FINISHED', 250);
  164. define('CMD_OK_DB_CHANGED', 251);
  165. define('CMD_OK_DB_INFO', 252);
  166. define('CMD_OK_DB_CLEAN', 253);
  167. define('CMD_OK_PROJECT_ADD', 254);
  168. define('CMD_OK_PROJECT_DEL', 255);
  169. define('CMD_OK_DB_COMMITED', 256);
  170. define('CMD_OK_DB_REBUILD', 257);
  171. define('CMD_OK_LOG_FLUSHED', 258);
  172. define('CMD_OK_RESULT_SYNONYMS', 280);
  173. define('CMD_OK_SCWS_RESULT', 290);
  174. define('CMD_OK_SCWS_TOPS', 291);
  175. define('PACKAGE_BUGREPORT', "http://www.xunsearch.com/bugs");
  176. define('PACKAGE_NAME', "xunsearch");
  177. define('PACKAGE_TARNAME', "xunsearch");
  178. define('PACKAGE_URL', "");
  179. define('PACKAGE_VERSION', "1.3.2");
  180. define('XS_LIB_ROOT', dirname(__FILE__));
  181. class XSException extends Exception
  182. {
  183. public function __toString()
  184. {
  185. $string = '[' . __CLASS__ . '] ' . $this->getRelPath($this->getFile()) . '(' . $this->getLine() . '): ';
  186. $string .= $this->getMessage() . ($this->getCode() > 0 ? '(S#' . $this->getCode() . ')' : '');
  187. return $string;
  188. }
  189. public static function getRelPath($file)
  190. {
  191. $from = getcwd();
  192. $file = realpath($file);
  193. if (is_dir($file))
  194. {
  195. $pos = false;
  196. $to = $file;
  197. }
  198. else
  199. {
  200. $pos = strrpos($file, '/');
  201. $to = substr($file, 0, $pos);
  202. }
  203. for ($rel = '';; $rel .= '../')
  204. {
  205. if ($from === $to)
  206. break;
  207. if ($from === dirname($from))
  208. {
  209. $rel .= substr($to, 1);
  210. break;
  211. }
  212. if (!strncmp($from . '/', $to, strlen($from) + 1))
  213. {
  214. $rel .= substr($to, strlen($from) + 1);
  215. break;
  216. }
  217. $from = dirname($from);
  218. }
  219. if (substr($rel, -1, 1) === '/')
  220. $rel = substr($rel, 0, -1);
  221. if ($pos !== false)
  222. $rel .= substr($file, $pos);
  223. return $rel;
  224. }
  225. }
  226. class XSErrorException extends XSException
  227. {
  228. private $_file, $_line;
  229. public function __construct($code, $message, $file, $line, $previous = null)
  230. {
  231. $this->_file = $file;
  232. $this->_line = $line;
  233. if (version_compare(PHP_VERSION, '5.3.0', '>='))
  234. parent::__construct($message, $code, $previous);
  235. else
  236. parent::__construct($message, $code);
  237. }
  238. public function __toString()
  239. {
  240. $string = '[' . __CLASS__ . '] ' . $this->getRelPath($this->_file) . '(' . $this->_line . '): ';
  241. $string .= $this->getMessage() . '(' . $this->getCode() . ')';
  242. return $string;
  243. }
  244. }
  245. class XSComponent
  246. {
  247. public function __get($name)
  248. {
  249. $getter = 'get' . $name;
  250. if (method_exists($this, $getter))
  251. return $this->$getter();
  252. $msg = method_exists($this, 'set' . $name) ? 'Write-only' : 'Undefined';
  253. $msg .= ' property: ' . get_class($this) . '::$' . $name;
  254. throw new XSException($msg);
  255. }
  256. public function __set($name, $value)
  257. {
  258. $setter = 'set' . $name;
  259. if (method_exists($this, $setter))
  260. return $this->$setter($value);
  261. $msg = method_exists($this, 'get' . $name) ? 'Read-only' : 'Undefined';
  262. $msg .= ' property: ' . get_class($this) . '::$' . $name;
  263. throw new XSException($msg);
  264. }
  265. public function __isset($name)
  266. {
  267. return method_exists($this, 'get' . $name);
  268. }
  269. public function __unset($name)
  270. {
  271. $this->__set($name, null);
  272. }
  273. }
  274. class XS extends XSComponent
  275. {
  276. private $_index;
  277. private $_search;
  278. private $_scws;
  279. private $_scheme, $_bindScheme;
  280. private $_config;
  281. private static $_lastXS;
  282. public function __construct($file)
  283. {
  284. if (strlen($file) < 255 && !is_file($file))
  285. {
  286. $file2 = XS_LIB_ROOT . '/../app/' . $file . '.ini';
  287. if (is_file($file2))
  288. $file = $file2;
  289. }
  290. $this->loadIniFile($file);
  291. self::$_lastXS = $this;
  292. }
  293. public function __destruct()
  294. {
  295. $this->_index = null;
  296. $this->_search = null;
  297. }
  298. public static function getLastXS()
  299. {
  300. return self::$_lastXS;
  301. }
  302. public function getScheme()
  303. {
  304. return $this->_scheme;
  305. }
  306. public function setScheme(XSFieldScheme $fs)
  307. {
  308. $fs->checkValid(true);
  309. $this->_scheme = $fs;
  310. if ($this->_search !== null)
  311. $this->_search->markResetScheme();
  312. }
  313. public function restoreScheme()
  314. {
  315. if ($this->_scheme !== $this->_bindScheme)
  316. {
  317. $this->_scheme = $this->_bindScheme;
  318. if ($this->_search !== null)
  319. $this->_search->markResetScheme(true);
  320. }
  321. }
  322. public function getName()
  323. {
  324. return $this->_config['project.name'];
  325. }
  326. public function setName($name)
  327. {
  328. $this->_config['project.name'] = $name;
  329. }
  330. public function getDefaultCharset()
  331. {
  332. return isset($this->_config['project.default_charset']) ?
  333. strtoupper($this->_config['project.default_charset']) : 'UTF-8';
  334. }
  335. public function setDefaultCharset($charset)
  336. {
  337. $this->_config['project.default_charset'] = strtoupper($charset);
  338. }
  339. public function getIndex()
  340. {
  341. if ($this->_index === null)
  342. {
  343. $conn = isset($this->_config['server.index']) ? $this->_config['server.index'] : 8383;
  344. $this->_index = new XSIndex($conn, $this);
  345. $this->_index->setTimeout(0);
  346. }
  347. return $this->_index;
  348. }
  349. public function getSearch()
  350. {
  351. if ($this->_search === null)
  352. {
  353. $conn = isset($this->_config['server.search']) ? $this->_config['server.search'] : 8384;
  354. $this->_search = new XSSearch($conn, $this);
  355. $this->_search->setCharset($this->getDefaultCharset());
  356. }
  357. return $this->_search;
  358. }
  359. public function getScwsServer()
  360. {
  361. if ($this->_scws === null)
  362. {
  363. $conn = isset($this->_config['server.search']) ? $this->_config['server.search'] : 8384;
  364. $this->_scws = new XSServer($conn, $this);
  365. }
  366. return $this->_scws;
  367. }
  368. public function getFieldId()
  369. {
  370. return $this->_scheme->getFieldId();
  371. }
  372. public function getFieldTitle()
  373. {
  374. return $this->_scheme->getFieldTitle();
  375. }
  376. public function getFieldBody()
  377. {
  378. return $this->_scheme->getFieldBody();
  379. }
  380. public function getField($name, $throw = true)
  381. {
  382. return $this->_scheme->getField($name, $throw);
  383. }
  384. public function getAllFields()
  385. {
  386. return $this->_scheme->getAllFields();
  387. }
  388. public static function autoload($name)
  389. {
  390. $file = XS_LIB_ROOT . '/' . $name . '.class.php';
  391. if (file_exists($file))
  392. require_once $file;
  393. }
  394. public static function convert($data, $to, $from)
  395. {
  396. if ($to == $from)
  397. return $data;
  398. if (is_array($data))
  399. {
  400. foreach ($data as $key => $value)
  401. {
  402. $data[$key] = self::convert($value, $to, $from);
  403. }
  404. return $data;
  405. }
  406. if (is_string($data) && preg_match('/[\x81-\xfe]/', $data))
  407. {
  408. if (function_exists('mb_convert_encoding'))
  409. return mb_convert_encoding($data, $to, $from);
  410. else if (function_exists('iconv'))
  411. return iconv($from, $to . '//TRANSLIT', $data);
  412. else
  413. throw new XSException('Cann\'t find the mbstring or iconv extension to convert encoding');
  414. }
  415. return $data;
  416. }
  417. private function parseIniData($data)
  418. {
  419. $ret = array();
  420. $cur = &$ret;
  421. $lines = explode("\n", $data);
  422. foreach ($lines as $line)
  423. {
  424. if ($line === '' || $line[0] == ';' || $line[0] == '#')
  425. continue;
  426. $line = trim($line);
  427. if ($line === '')
  428. continue;
  429. if ($line[0] === '[' && substr($line, -1, 1) === ']')
  430. {
  431. $sec = substr($line, 1, -1);
  432. $ret[$sec] = array();
  433. $cur = &$ret[$sec];
  434. continue;
  435. }
  436. if (($pos = strpos($line, '=')) === false)
  437. continue;
  438. $key = trim(substr($line, 0, $pos));
  439. $value = trim(substr($line, $pos + 1), " '\t\"");
  440. $cur[$key] = $value;
  441. }
  442. return $ret;
  443. }
  444. private function loadIniFile($file)
  445. {
  446. $cache = false;
  447. $cache_write = '';
  448. if (strlen($file) < 255 && file_exists($file))
  449. {
  450. $cache_key = md5(__CLASS__ . '::ini::' . realpath($file));
  451. if (function_exists('apc_fetch'))
  452. {
  453. $cache = apc_fetch($cache_key);
  454. $cache_write = 'apc_store';
  455. }
  456. else if (function_exists('xcache_get') && php_sapi_name() !== 'cli')
  457. {
  458. $cache = xcache_get($cache_key);
  459. $cache_write = 'xcache_set';
  460. }
  461. else if (function_exists('eaccelerator_get'))
  462. {
  463. $cache = eaccelerator_get($cache_key);
  464. $cache_write = 'eaccelerator_put';
  465. }
  466. if ($cache && filemtime($file) <= $cache['mtime'])
  467. {
  468. $this->_scheme = $this->_bindScheme = unserialize($cache['scheme']);
  469. $this->_config = $cache['config'];
  470. return;
  471. }
  472. $data = file_get_contents($file);
  473. }
  474. else
  475. {
  476. $data = $file;
  477. $file = substr(md5($file), 8, 8) . '.ini';
  478. }
  479. $this->_config = $this->parseIniData($data);
  480. if ($this->_config === false)
  481. throw new XSException('Failed to parse project config file/string: \'' . substr($file, 0, 10) . '...\'');
  482. $scheme = new XSFieldScheme;
  483. foreach ($this->_config as $key => $value)
  484. {
  485. if (is_array($value))
  486. $scheme->addField($key, $value);
  487. }
  488. $scheme->checkValid(true);
  489. if (!isset($this->_config['project.name']))
  490. $this->_config['project.name'] = basename($file, '.ini');
  491. $this->_scheme = $this->_bindScheme = $scheme;
  492. if ($cache_write != '')
  493. {
  494. $cache['mtime'] = filemtime($file);
  495. $cache['scheme'] = serialize($this->_scheme);
  496. $cache['config'] = $this->_config;
  497. call_user_func($cache_write, $cache_key, $cache);
  498. }
  499. }
  500. }
  501. function xs_error_handler($errno, $error, $file, $line)
  502. {
  503. if ($errno & ini_get('error_reporting'))
  504. throw new XSErrorException($errno, $error, $file, $line);
  505. }
  506. set_error_handler('xs_error_handler');
  507. class XSDocument implements ArrayAccess, IteratorAggregate
  508. {
  509. private $_data;
  510. private $_terms, $_texts;
  511. private $_charset, $_meta;
  512. private static $_resSize = 20;
  513. private static $_resFormat = 'Idocid/Irank/Iccount/ipercent/fweight';
  514. public function __construct($p = null, $d = null)
  515. {
  516. $this->_data = array();
  517. if (is_array($p))
  518. $this->_data = $p;
  519. else if (is_string($p))
  520. {
  521. if (strlen($p) !== self::$_resSize)
  522. {
  523. $this->setCharset($p);
  524. return;
  525. }
  526. $this->_meta = unpack(self::$_resFormat, $p);
  527. }
  528. if ($d !== null && is_string($d))
  529. $this->setCharset($d);
  530. }
  531. public function __get($name)
  532. {
  533. if (!isset($this->_data[$name]))
  534. return null;
  535. return $this->autoConvert($this->_data[$name]);
  536. }
  537. public function __set($name, $value)
  538. {
  539. if ($this->_meta !== null)
  540. throw new XSException('Magick property of result document is read-only');
  541. $this->setField($name, $value);
  542. }
  543. public function __call($name, $args)
  544. {
  545. if ($this->_meta !== null)
  546. {
  547. $name = strtolower($name);
  548. if (isset($this->_meta[$name]))
  549. return $this->_meta[$name];
  550. }
  551. throw new XSException('Call to undefined method `' . get_class($this) . '::' . $name . '()\'');
  552. }
  553. public function getCharset()
  554. {
  555. return $this->_charset;
  556. }
  557. public function setCharset($charset)
  558. {
  559. $this->_charset = strtoupper($charset);
  560. if ($this->_charset == 'UTF8')
  561. $this->_charset = 'UTF-8';
  562. }
  563. public function setFields($data)
  564. {
  565. if ($data === null)
  566. $this->_data = array();
  567. else
  568. $this->_data = array_merge($this->_data, $data);
  569. }
  570. public function setField($name, $value)
  571. {
  572. if ($value === null)
  573. unset($this->_data[$name]);
  574. else
  575. $this->_data[$name] = $value;
  576. }
  577. public function f($name)
  578. {
  579. return $this->__get(strval($name));
  580. }
  581. public function getAddTerms($field)
  582. {
  583. $field = strval($field);
  584. if ($this->_terms === null || !isset($this->_terms[$field]))
  585. return null;
  586. $terms = array();
  587. foreach ($this->_terms[$field] as $term => $weight)
  588. {
  589. $term = $this->autoConvert($term);
  590. $terms[$term] = $weight;
  591. }
  592. return $terms;
  593. }
  594. public function getAddIndex($field)
  595. {
  596. $field = strval($field);
  597. if ($this->_texts === null || !isset($this->_texts[$field]))
  598. return null;
  599. return $this->autoConvert($this->_texts[$field]);
  600. }
  601. public function addTerm($field, $term, $weight = 1)
  602. {
  603. $field = strval($field);
  604. if (!is_array($this->_terms))
  605. $this->_terms = array();
  606. if (!isset($this->_terms[$field]))
  607. $this->_terms[$field] = array($term => $weight);
  608. else if (!isset($this->_terms[$field][$term]))
  609. $this->_terms[$field][$term] = $weight;
  610. else
  611. $this->_terms[$field][$term] += $weight;
  612. }
  613. public function addIndex($field, $text)
  614. {
  615. $field = strval($field);
  616. if (!is_array($this->_texts))
  617. $this->_texts = array();
  618. if (!isset($this->_texts[$field]))
  619. $this->_texts[$field] = strval($text);
  620. else
  621. $this->_texts[$field] .= "\n" . strval($text);
  622. }
  623. public function getIterator()
  624. {
  625. if ($this->_charset !== null && $this->_charset !== 'UTF-8')
  626. {
  627. $from = $this->_meta === null ? $this->_charset : 'UTF-8';
  628. $to = $this->_meta === null ? 'UTF-8' : $this->_charset;
  629. return new ArrayIterator(XS::convert($this->_data, $to, $from));
  630. }
  631. return new ArrayIterator($this->_data);
  632. }
  633. public function offsetExists($name)
  634. {
  635. return isset($this->_data[$name]);
  636. }
  637. public function offsetGet($name)
  638. {
  639. return $this->__get($name);
  640. }
  641. public function offsetSet($name, $value)
  642. {
  643. if (!is_null($name))
  644. $this->__set(strval($name), $value);
  645. }
  646. public function offsetUnset($name)
  647. {
  648. unset($this->_data[$name]);
  649. }
  650. public function beforeSubmit(XSIndex $index)
  651. {
  652. if ($this->_charset === null)
  653. $this->_charset = $index->xs->getDefaultCharset();
  654. return true;
  655. }
  656. public function afterSubmit($index)
  657. {
  658. }
  659. private function autoConvert($value)
  660. {
  661. if ($this->_charset === null || $this->_charset == 'UTF-8'
  662. || !is_string($value) || !preg_match('/[\x81-\xfe]/', $value))
  663. {
  664. return $value;
  665. }
  666. $from = $this->_meta === null ? $this->_charset : 'UTF-8';
  667. $to = $this->_meta === null ? 'UTF-8' : $this->_charset;
  668. return XS::convert($value, $to, $from);
  669. }
  670. }
  671. class XSFieldScheme implements IteratorAggregate
  672. {
  673. const MIXED_VNO = 255;
  674. private $_fields = array();
  675. private $_typeMap = array();
  676. private $_vnoMap = array();
  677. private static $_logger;
  678. public function __toString()
  679. {
  680. $str = '';
  681. foreach ($this->_fields as $field)
  682. {
  683. $str .= $field->toConfig() . "\n";
  684. }
  685. return $str;
  686. }
  687. public function getFieldId()
  688. {
  689. if (isset($this->_typeMap[XSFieldMeta::TYPE_ID]))
  690. {
  691. $name = $this->_typeMap[XSFieldMeta::TYPE_ID];
  692. return $this->_fields[$name];
  693. }
  694. return false;
  695. }
  696. public function getFieldTitle()
  697. {
  698. if (isset($this->_typeMap[XSFieldMeta::TYPE_TITLE]))
  699. {
  700. $name = $this->_typeMap[XSFieldMeta::TYPE_TITLE];
  701. return $this->_fields[$name];
  702. }
  703. return false;
  704. }
  705. public function getFieldBody()
  706. {
  707. if (isset($this->_typeMap[XSFieldMeta::TYPE_BODY]))
  708. {
  709. $name = $this->_typeMap[XSFieldMeta::TYPE_BODY];
  710. return $this->_fields[$name];
  711. }
  712. return false;
  713. }
  714. public function getField($name, $throw = true)
  715. {
  716. if (is_int($name))
  717. {
  718. if (!isset($this->_vnoMap[$name]))
  719. {
  720. if ($throw === true)
  721. throw new XSException('Not exists field with vno: `' . $name . '\'');
  722. return false;
  723. }
  724. $name = $this->_vnoMap[$name];
  725. }
  726. if (!isset($this->_fields[$name]))
  727. {
  728. if ($throw === true)
  729. throw new XSException('Not exists field with name: `' . $name . '\'');
  730. return false;
  731. }
  732. return $this->_fields[$name];
  733. }
  734. public function getAllFields()
  735. {
  736. return $this->_fields;
  737. }
  738. public function getVnoMap()
  739. {
  740. return $this->_vnoMap;
  741. }
  742. public function addField($field, $config = null)
  743. {
  744. if (!$field instanceof XSFieldMeta)
  745. $field = new XSFieldMeta($field, $config);
  746. if (isset($this->_fields[$field->name]))
  747. throw new XSException('Duplicated field name: `' . $field->name . '\'');
  748. if ($field->isSpeical())
  749. {
  750. if (isset($this->_typeMap[$field->type]))
  751. {
  752. $prev = $this->_typeMap[$field->type];
  753. throw new XSException('Duplicated ' . strtoupper($config['type']) . ' field: `' . $field->name . '\' and `' . $prev . '\'');
  754. }
  755. $this->_typeMap[$field->type] = $field->name;
  756. }
  757. $field->vno = ($field->type == XSFieldMeta::TYPE_BODY) ? self::MIXED_VNO : count($this->_vnoMap);
  758. $this->_vnoMap[$field->vno] = $field->name;
  759. if ($field->type == XSFieldMeta::TYPE_ID)
  760. $this->_fields = array_merge(array($field->name => $field), $this->_fields);
  761. else
  762. $this->_fields[$field->name] = $field;
  763. }
  764. public function checkValid($throw = false)
  765. {
  766. if (!isset($this->_typeMap[XSFieldMeta::TYPE_ID]))
  767. {
  768. if ($throw)
  769. throw new XSException('Missing field of type ID');
  770. return false;
  771. }
  772. return true;
  773. }
  774. public function getIterator()
  775. {
  776. return new ArrayIterator($this->_fields);
  777. }
  778. public static function logger()
  779. {
  780. if (self::$_logger === null)
  781. {
  782. $scheme = new self;
  783. $scheme->addField('id', array('type' => 'id'));
  784. $scheme->addField('pinyin');
  785. $scheme->addField('partial');
  786. $scheme->addField('total', array('type' => 'numeric', 'index' => 'self'));
  787. $scheme->addField('lastnum', array('type' => 'numeric', 'index' => 'self'));
  788. $scheme->addField('currnum', array('type' => 'numeric', 'index' => 'self'));
  789. $scheme->addField('currtag', array('type' => 'string'));
  790. $scheme->addField('body', array('type' => 'body'));
  791. self::$_logger = $scheme;
  792. }
  793. return self::$_logger;
  794. }
  795. }
  796. class XSFieldMeta
  797. {
  798. const MAX_WDF = 0x3f;
  799. const TYPE_STRING = 0;
  800. const TYPE_NUMERIC = 1;
  801. const TYPE_DATE = 2;
  802. const TYPE_ID = 10;
  803. const TYPE_TITLE = 11;
  804. const TYPE_BODY = 12;
  805. const FLAG_INDEX_SELF = 0x01;
  806. const FLAG_INDEX_MIXED = 0x02;
  807. const FLAG_INDEX_BOTH = 0x03;
  808. const FLAG_WITH_POSITION = 0x10;
  809. public $name;
  810. public $cutlen = 0;
  811. public $weight = 1;
  812. public $type = 0;
  813. public $vno = 0;
  814. private $tokenizer = XSTokenizer::DFL;
  815. private $flag = 0;
  816. private static $_tokenizers = array();
  817. public function __construct($name, $config = null)
  818. {
  819. $this->name = strval($name);
  820. if (is_array($config))
  821. $this->fromConfig($config);
  822. }
  823. public function __toString()
  824. {
  825. return $this->name;
  826. }
  827. public function val($value)
  828. {
  829. if ($this->type == self::TYPE_DATE)
  830. {
  831. if (!is_numeric($value) || strlen($value) != 8)
  832. $value = date('Ymd', is_numeric($value) ? $value : strtotime($value));
  833. }
  834. return $value;
  835. }
  836. public function withPos()
  837. {
  838. return ($this->flag & self::FLAG_WITH_POSITION) ? true : false;
  839. }
  840. public function isBoolIndex()
  841. {
  842. return (!$this->hasIndex() || $this->tokenizer !== XSTokenizer::DFL);
  843. }
  844. public function isNumeric()
  845. {
  846. return ($this->type == self::TYPE_NUMERIC);
  847. }
  848. public function isSpeical()
  849. {
  850. return ($this->type == self::TYPE_ID || $this->type == self::TYPE_TITLE || $this->type == self::TYPE_BODY);
  851. }
  852. public function hasIndex()
  853. {
  854. return ($this->flag & self::FLAG_INDEX_BOTH) ? true : false;
  855. }
  856. public function hasIndexMixed()
  857. {
  858. return ($this->flag & self::FLAG_INDEX_MIXED) ? true : false;
  859. }
  860. public function hasIndexSelf()
  861. {
  862. return ($this->flag & self::FLAG_INDEX_SELF) ? true : false;
  863. }
  864. public function hasCustomTokenizer()
  865. {
  866. return ($this->tokenizer !== XSTokenizer::DFL);
  867. }
  868. public function getCustomTokenizer()
  869. {
  870. if (isset(self::$_tokenizers[$this->tokenizer]))
  871. return self::$_tokenizers[$this->tokenizer];
  872. else
  873. {
  874. if (($pos1 = strpos($this->tokenizer, '(')) !== false
  875. && ($pos2 = strrpos($this->tokenizer, ')', $pos1 + 1)))
  876. {
  877. $name = 'XSTokenizer' . ucfirst(trim(substr($this->tokenizer, 0, $pos1)));
  878. $arg = substr($this->tokenizer, $pos1 + 1, $pos2 - $pos1 - 1);
  879. }
  880. else
  881. {
  882. $name = 'XSTokenizer' . ucfirst($this->tokenizer);
  883. $arg = null;
  884. }
  885. if (!class_exists($name))
  886. throw new XSException('Undefined custom tokenizer `' . $this->tokenizer . '\' for field `' . $this->name . '\'');
  887. $obj = $arg === null ? new $name : new $name($arg);
  888. if (!$obj instanceof XSTokenizer)
  889. throw new XSException($name . ' for field `' . $this->name . '\' dose not implement the interface: XSTokenizer');
  890. self::$_tokenizers[$this->tokenizer] = $obj;
  891. return $obj;
  892. }
  893. }
  894. public function toConfig()
  895. {
  896. $str = "[" . $this->name . "]\n";
  897. if ($this->type === self::TYPE_NUMERIC)
  898. $str .= "type = numeric\n";
  899. else if ($this->type === self::TYPE_DATE)
  900. $str .= "type = date\n";
  901. else if ($this->type === self::TYPE_ID)
  902. $str .= "type = id\n";
  903. else if ($this->type === self::TYPE_TITLE)
  904. $str .= "type = title\n";
  905. else if ($this->type === self::TYPE_BODY)
  906. $str .= "type = body\n";
  907. if ($this->type !== self::TYPE_BODY && ($index = ($this->flag & self::FLAG_INDEX_BOTH)))
  908. {
  909. if ($index === self::FLAG_INDEX_BOTH)
  910. {
  911. if ($this->type !== self::TYPE_TITLE)
  912. $str .= "index = both\n";
  913. }
  914. else if ($index === self::FLAG_INDEX_MIXED)
  915. {
  916. $str .= "index = mixed\n";
  917. }
  918. else
  919. {
  920. if ($this->type != self::TYPE_ID)
  921. $str .= "index = self\n";
  922. }
  923. }
  924. if ($this->type !== self::TYPE_ID && $this->tokenizer !== XSTokenizer::DFL)
  925. $str .= "tokenizer = " . $this->tokenizer . "\n";
  926. if ($this->cutlen > 0 && !($this->cutlen === 300 && $this->type === self::TYPE_BODY))
  927. $str .= "cutlen = " . $this->cutlen . "\n";
  928. if ($this->weight !== 1 && !($this->weight === 5 && $this->type === self::TYPE_TITLE))
  929. $str .= "weight = " . $this->weight . "\n";
  930. if ($this->flag & self::FLAG_WITH_POSITION)
  931. {
  932. if ($this->type !== self::TYPE_BODY && $this->type !== self::TYPE_TITLE)
  933. $str .= "phrase = yes\n";
  934. }
  935. else
  936. {
  937. if ($this->type === self::TYPE_BODY || $this->type === self::TYPE_TITLE)
  938. $str .= "phrase = no\n";
  939. }
  940. return $str;
  941. }
  942. public function fromConfig($config)
  943. {
  944. if (isset($config['type']))
  945. {
  946. $predef = 'self::TYPE_' . strtoupper($config['type']);
  947. if (defined($predef))
  948. {
  949. $this->type = constant($predef);
  950. if ($this->type == self::TYPE_ID)
  951. {
  952. $this->flag = self::FLAG_INDEX_SELF;
  953. $this->tokenizer = 'full';
  954. }
  955. else if ($this->type == self::TYPE_TITLE)
  956. {
  957. $this->flag = self::FLAG_INDEX_BOTH | self::FLAG_WITH_POSITION;
  958. $this->weight = 5;
  959. }
  960. else if ($this->type == self::TYPE_BODY)
  961. {
  962. $this->vno = XSFieldScheme::MIXED_VNO;
  963. $this->flag = self::FLAG_INDEX_SELF | self::FLAG_WITH_POSITION;
  964. $this->cutlen = 300;
  965. }
  966. }
  967. }
  968. if (isset($config['index']) && $this->type != self::TYPE_BODY)
  969. {
  970. $predef = 'self::FLAG_INDEX_' . strtoupper($config['index']);
  971. if (defined($predef))
  972. {
  973. $this->flag &= ~ self::FLAG_INDEX_BOTH;
  974. $this->flag |= constant($predef);
  975. }
  976. if ($this->type == self::TYPE_ID)
  977. $this->flag |= self::FLAG_INDEX_SELF;
  978. }
  979. if (isset($config['cutlen']))
  980. $this->cutlen = intval($config['cutlen']);
  981. if (isset($config['weight']) && $this->type != self::TYPE_BODY)
  982. $this->weight = intval($config['weight']) & self::MAX_WDF;
  983. if (isset($config['phrase']))
  984. {
  985. if (!strcasecmp($config['phrase'], 'yes'))
  986. $this->flag |= self::FLAG_WITH_POSITION;
  987. else if (!strcasecmp($config['phrase'], 'no'))
  988. $this->flag &= ~ self::FLAG_WITH_POSITION;
  989. }
  990. if (isset($config['tokenizer']) && $this->type != self::TYPE_ID
  991. && $config['tokenizer'] != 'default')
  992. {
  993. $this->tokenizer = $config['tokenizer'];
  994. }
  995. }
  996. }
  997. class XSIndex extends XSServer
  998. {
  999. private $_buf = '';
  1000. private $_bufSize = 0;
  1001. private $_rebuild = false;
  1002. public function clean()
  1003. {
  1004. $this->execCommand(CMD_INDEX_CLEAN_DB, CMD_OK_DB_CLEAN);
  1005. return $this;
  1006. }
  1007. public function add(XSDocument $doc)
  1008. {
  1009. return $this->update($doc, true);
  1010. }
  1011. public function update(XSDocument $doc, $add = false)
  1012. {
  1013. if ($doc->beforeSubmit($this) === false)
  1014. return $this;
  1015. $fid = $this->xs->getFieldId();
  1016. $key = $doc->f($fid);
  1017. if ($key === null || $key === '')
  1018. throw new XSException('Missing value of primarky key (FIELD:' . $fid . ')');
  1019. $cmd = new XSCommand(CMD_INDEX_REQUEST, CMD_INDEX_REQUEST_ADD);
  1020. if ($add !== true)
  1021. {
  1022. $cmd->arg1 = CMD_INDEX_REQUEST_UPDATE;
  1023. $cmd->arg2 = $fid->vno;
  1024. $cmd->buf = $key;
  1025. }
  1026. $cmds = array($cmd);
  1027. foreach ($this->xs->getAllFields() as $field) /* @var $field XSFieldMeta */
  1028. {
  1029. if (($value = $doc->f($field)) !== null)
  1030. {
  1031. $varg = $field->isNumeric() ? CMD_VALUE_FLAG_NUMERIC : 0;
  1032. $value = $field->val($value);
  1033. if (!$field->hasCustomTokenizer())
  1034. {
  1035. $wdf = $field->weight | ($field->withPos() ? CMD_INDEX_FLAG_WITHPOS : 0);
  1036. if ($field->hasIndexMixed())
  1037. $cmds[] = new XSCommand(CMD_DOC_INDEX, $wdf, XSFieldScheme::MIXED_VNO, $value);
  1038. if ($field->hasIndexSelf())
  1039. {
  1040. $wdf |= $field->isNumeric() ? 0 : CMD_INDEX_FLAG_SAVEVALUE;
  1041. $cmds[] = new XSCommand(CMD_DOC_INDEX, $wdf, $field->vno, $value);
  1042. }
  1043. if (!$field->hasIndexSelf() || $field->isNumeric())
  1044. $cmds[] = new XSCommand(CMD_DOC_VALUE, $varg, $field->vno, $value);
  1045. }
  1046. else
  1047. {
  1048. if ($field->hasIndex())
  1049. {
  1050. $terms = $field->getCustomTokenizer()->getTokens($value, $doc);
  1051. if ($field->hasIndexSelf())
  1052. {
  1053. foreach ($terms as $term)
  1054. {
  1055. $term = strtolower($term);
  1056. $cmds[] = new XSCommand(CMD_DOC_TERM, 1, $field->vno, $term);
  1057. }
  1058. }
  1059. if ($field->hasIndexMixed())
  1060. {
  1061. $mtext = implode(' ', $terms);
  1062. $cmds[] = new XSCommand(CMD_DOC_INDEX, $field->weight, XSFieldScheme::MIXED_VNO, $mtext);
  1063. }
  1064. }
  1065. $cmds[] = new XSCommand(CMD_DOC_VALUE, $varg, $field->vno, $value);
  1066. }
  1067. }
  1068. if (($terms = $doc->getAddTerms($field)) !== null)
  1069. {
  1070. $wdf1 = $field->isBoolIndex() ? 0 : CMD_INDEX_FLAG_CHECKSTEM;
  1071. foreach ($terms as $term => $wdf)
  1072. {
  1073. $term = strtolower($term);
  1074. $wdf2 = $field->isBoolIndex() ? 1 : $wdf * $field->weight;
  1075. while ($wdf2 > XSFieldMeta::MAX_WDF)
  1076. {
  1077. $cmds[] = new XSCommand(CMD_DOC_TERM, $wdf1 | XSFieldMeta::MAX_WDF, $field->vno, $term);
  1078. $wdf2 -= XSFieldMeta::MAX_WDF;
  1079. }
  1080. $cmds[] = new XSCommand(CMD_DOC_TERM, $wdf1 | $wdf2, $field->vno, $term);
  1081. }
  1082. }
  1083. if (($text = $doc->getAddIndex($field)) !== null)
  1084. {
  1085. if (!$field->hasCustomTokenizer())
  1086. {
  1087. $wdf = $field->weight | ($field->withPos() ? CMD_INDEX_FLAG_WITHPOS : 0);
  1088. $cmds[] = new XSCommand(CMD_DOC_INDEX, $wdf, $field->vno, $text);
  1089. }
  1090. else
  1091. {
  1092. $wdf = $field->isBoolIndex() ? 1 : ($field->weight | CMD_INDEX_FLAG_CHECKSTEM);
  1093. $terms = $field->getCustomTokenizer()->getTokens($text, $doc);
  1094. foreach ($terms as $term)
  1095. {
  1096. $term = strtolower($term);
  1097. $cmds[] = new XSCommand(CMD_DOC_TERM, $wdf, $field->vno, $term);
  1098. }
  1099. }
  1100. }
  1101. }
  1102. $cmds[] = new XSCommand(CMD_INDEX_SUBMIT);
  1103. if ($this->_bufSize > 0)
  1104. $this->appendBuffer(implode('', $cmds));
  1105. else
  1106. {
  1107. for ($i = 0; $i < count($cmds) - 1; $i++)
  1108. $this->execCommand($cmds[$i]);
  1109. $this->execCommand($cmds[$i], CMD_OK_RQST_FINISHED);
  1110. }
  1111. $doc->afterSubmit($this);
  1112. return $this;
  1113. }
  1114. public function del($term, $field = null)
  1115. {
  1116. $field = $field === null ? $this->xs->getFieldId() : $this->xs->getField($field);
  1117. $cmds = array();
  1118. $terms = is_array($term) ? array_unique($term) : array($term);
  1119. $terms = XS::convert($terms, 'UTF-8', $this->xs->getDefaultCharset());
  1120. foreach ($terms as $term)
  1121. {
  1122. $cmds[] = new XSCommand(CMD_INDEX_REMOVE, 0, $field->vno, strtolower($term));
  1123. }
  1124. if ($this->_bufSize > 0)
  1125. $this->appendBuffer(implode('', $cmds));
  1126. else if (count($cmds) == 1)
  1127. $this->execCommand($cmds[0], CMD_OK_RQST_FINISHED);
  1128. else
  1129. {
  1130. $cmd = array('cmd' => CMD_INDEX_EXDATA, 'buf' => implode('', $cmds));
  1131. $this->execCommand($cmd, CMD_OK_RQST_FINISHED);
  1132. }
  1133. return $this;
  1134. }
  1135. public function addExdata($data, $check_file = true)
  1136. {
  1137. if (strlen($data) < 255 && $check_file && file_exists($data) && ($data = file_get_contents($data)) === false)
  1138. throw new XSException('Failed to read exdata from file');
  1139. $first = ord(substr($data, 0, 1));
  1140. if ($first != CMD_IMPORT_HEADER && $first != CMD_INDEX_REQUEST
  1141. && $first != CMD_INDEX_SYNONYMS
  1142. && $first != CMD_INDEX_REMOVE && $first != CMD_INDEX_EXDATA)
  1143. {
  1144. throw new XSException('Invalid start command of exdata (CMD:' . $first . ')');
  1145. }
  1146. $cmd = array('cmd' => CMD_INDEX_EXDATA, 'buf' => $data);
  1147. $this->execCommand($cmd, CMD_OK_RQST_FINISHED);
  1148. return $this;
  1149. }
  1150. public function addSynonym($raw, $synonym)
  1151. {
  1152. $raw = strval($raw);
  1153. $synonym = strval($synonym);
  1154. if ($raw !== '' && $synonym !== '')
  1155. {
  1156. $cmd = new XSCommand(CMD_INDEX_SYNONYMS, CMD_INDEX_SYNONYMS_ADD, 0, $raw, $synonym);
  1157. if ($this->_bufSize > 0)
  1158. $this->appendBuffer(strval($cmd));
  1159. else
  1160. $this->execCommand($cmd, CMD_OK_RQST_FINISHED);
  1161. }
  1162. return $this;
  1163. }
  1164. public function delSynonym($raw, $synonym = null)
  1165. {
  1166. $raw = strval($raw);
  1167. $synonym = $synonym === null ? '' : strval($synonym);
  1168. if ($raw !== '')
  1169. {
  1170. $cmd = new XSCommand(CMD_INDEX_SYNONYMS, CMD_INDEX_SYNONYMS_DEL, 0, $raw, $synonym);
  1171. if ($this->_bufSize > 0)
  1172. $this->appendBuffer(strval($cmd));
  1173. else
  1174. $this->execCommand($cmd, CMD_OK_RQST_FINISHED);
  1175. }
  1176. return $this;
  1177. }
  1178. public function openBuffer($size = 4)
  1179. {
  1180. if ($this->_buf !== '')
  1181. $this->addExdata($this->_buf, false);
  1182. $this->_bufSize = intval($size) << 20;
  1183. $this->_buf = '';
  1184. return $this;
  1185. }
  1186. public function closeBuffer()
  1187. {
  1188. return $this->openBuffer(0);
  1189. }
  1190. public function beginRebuild()
  1191. {
  1192. $this->execCommand(array('cmd' => CMD_INDEX_REBUILD, 'arg1' => 0), CMD_OK_DB_REBUILD);
  1193. $this->_rebuild = true;
  1194. return $this;
  1195. }
  1196. public function endRebuild()
  1197. {
  1198. if ($this->_rebuild === true)
  1199. {
  1200. $this->_rebuild = false;
  1201. $this->execCommand(array('cmd' => CMD_INDEX_REBUILD, 'arg1' => 1), CMD_OK_DB_REBUILD);
  1202. }
  1203. return $this;
  1204. }
  1205. public function setDb($name)
  1206. {
  1207. $this->execCommand(array('cmd' => CMD_INDEX_SET_DB, 'buf' => $name), CMD_OK_DB_CHANGED);
  1208. return $this;
  1209. }
  1210. public function flushLogging()
  1211. {
  1212. try
  1213. {
  1214. $this->execCommand(CMD_FLUSH_LOGGING, CMD_OK_LOG_FLUSHED);
  1215. }
  1216. catch (XSException $e)
  1217. {
  1218. if ($e->getCode() === CMD_ERR_BUSY)
  1219. return false;
  1220. throw $e;
  1221. }
  1222. return true;
  1223. }
  1224. public function flushIndex()
  1225. {
  1226. try
  1227. {
  1228. $this->execCommand(CMD_INDEX_COMMIT, CMD_OK_DB_COMMITED);
  1229. }
  1230. catch (XSException $e)
  1231. {
  1232. if ($e->getCode() === CMD_ERR_BUSY || $e->getCode() === CMD_ERR_RUNNING)
  1233. return false;
  1234. throw $e;
  1235. }
  1236. return true;
  1237. }
  1238. public function close($ioerr = false)
  1239. {
  1240. $this->closeBuffer();
  1241. parent::close($ioerr);
  1242. }
  1243. private function appendBuffer($buf)
  1244. {
  1245. $this->_buf .= $buf;
  1246. if (strlen($this->_buf) >= $this->_bufSize)
  1247. {
  1248. $this->addExdata($this->_buf, false);
  1249. $this->_buf = '';
  1250. }
  1251. }
  1252. public function __destruct()
  1253. {
  1254. if ($this->_rebuild === true)
  1255. {
  1256. try
  1257. {
  1258. $this->endRebuild();
  1259. }
  1260. catch (Exception $e)
  1261. {
  1262. }
  1263. }
  1264. parent::__destruct();
  1265. }
  1266. }
  1267. class XSSearch extends XSServer
  1268. {
  1269. const PAGE_SIZE = 10;
  1270. const LOG_DB = 'log_db';
  1271. private $_defaultOp = CMD_QUERY_OP_AND;
  1272. private $_prefix, $_fieldSet, $_resetScheme = false;
  1273. private $_query, $_terms, $_count;
  1274. private $_lastCount, $_highlight;
  1275. private $_curDb, $_curDbs = array();
  1276. private $_lastDb, $_lastDbs = array();
  1277. private $_facets = array();
  1278. private $_limit = 0, $_offset = 0;
  1279. private $_charset = 'UTF-8';
  1280. public function open($conn)
  1281. {
  1282. parent::open($conn);
  1283. $this->_prefix = array();
  1284. $this->_fieldSet = false;
  1285. $this->_lastCount = false;
  1286. }
  1287. public function setCharset($charset)
  1288. {
  1289. $this->_charset = strtoupper($charset);
  1290. if ($this->_charset == 'UTF8')
  1291. $this->_charset = 'UTF-8';
  1292. return $this;
  1293. }
  1294. public function setFuzzy($value = true)
  1295. {
  1296. $this->_defaultOp = $value === true ? CMD_QUERY_OP_OR : CMD_QUERY_OP_AND;
  1297. return $this;
  1298. }
  1299. public function setAutoSynonyms($value = true)
  1300. {
  1301. $flag = CMD_PARSE_FLAG_BOOLEAN | CMD_PARSE_FLAG_PHRASE | CMD_PARSE_FLAG_LOVEHATE;
  1302. if ($value === true)
  1303. $flag |= CMD_PARSE_FLAG_AUTO_MULTIWORD_SYNONYMS;
  1304. $cmd = array('cmd' => CMD_QUERY_PARSEFLAG, 'arg' => $flag);
  1305. $this->execCommand($cmd);
  1306. return $this;
  1307. }
  1308. public function getAllSynonyms($limit = 0, $offset = 0, $stemmed = false)
  1309. {
  1310. $page = $limit > 0 ? pack('II', intval($offset), intval($limit)) : '';
  1311. $cmd = array('cmd' => CMD_SEARCH_GET_SYNONYMS, 'buf1' => $page);
  1312. $cmd['arg1'] = $stemmed == true ? 1 : 0;
  1313. $res = $this->execCommand($cmd, CMD_OK_RESULT_SYNONYMS);
  1314. $ret = array();
  1315. if (!empty($res->buf))
  1316. {
  1317. foreach (explode("\n", $res->buf) as $line)
  1318. {
  1319. $value = explode("\t", $line);
  1320. $key = array_shift($value);
  1321. $ret[$key] = $value;
  1322. }
  1323. }
  1324. return $ret;
  1325. }
  1326. public function getQuery($query = null)
  1327. {
  1328. $query = $query === null ? '' : $this->preQueryString($query);
  1329. $cmd = new XSCommand(CMD_QUERY_GET_STRING, 0, $this->_defaultOp, $query);
  1330. $res = $this->execCommand($cmd, CMD_OK_QUERY_STRING);
  1331. return XS::convert($res->buf, $this->_charset, 'UTF-8');
  1332. }
  1333. public function setQuery($query)
  1334. {
  1335. $this->clearQuery();
  1336. if ($query !== null)
  1337. {
  1338. $this->_query = $query;
  1339. $this->addQueryString($query);
  1340. }
  1341. return $this;
  1342. }
  1343. public function setMultiSort($fields)
  1344. {
  1345. if (!is_array($fields))
  1346. return $this->setSort($fields);
  1347. $buf = '';
  1348. foreach ($fields as $key => $value)
  1349. {
  1350. if (is_bool($value))
  1351. {
  1352. $vno = $this->xs->getField($key, true)->vno;
  1353. $asc = $value;
  1354. }
  1355. else
  1356. {
  1357. $vno = $this->xs->getField($value, true)->vno;
  1358. $asc = false;
  1359. }
  1360. if ($vno != XSFieldScheme::MIXED_VNO)
  1361. $buf .= chr($vno) . chr($asc ? 1 : 0);
  1362. }
  1363. if ($buf !== '')
  1364. {
  1365. $cmd = new XSCommand(CMD_SEARCH_SET_SORT, CMD_SORT_TYPE_MULTI, 0, $buf);
  1366. $this->execCommand($cmd);
  1367. }
  1368. return $this;
  1369. }
  1370. public function setSort($field, $asc = false)
  1371. {
  1372. if (is_array($field))
  1373. return $this->setMultiSort($field);
  1374. if ($field === null)
  1375. $cmd = new XSCommand(CMD_SEARCH_SET_SORT, CMD_SORT_TYPE_RELEVANCE);
  1376. else
  1377. {
  1378. $type = CMD_SORT_TYPE_VALUE | ($asc ? CMD_SORT_FLAG_ASCENDING : 0);
  1379. $vno = $this->xs->getField($field, true)->vno;
  1380. $cmd = new XSCommand(CMD_SEARCH_SET_SORT, $type, $vno);
  1381. }
  1382. $this->execCommand($cmd);
  1383. return $this;
  1384. }
  1385. public function setCollapse($field, $num = 1)
  1386. {
  1387. $vno = $field === null ? XSFieldScheme::MIXED_VNO : $this->xs->getField($field, true)->vno;
  1388. $max = min(255, intval($num));
  1389. $cmd = new XSCommand(CMD_SEARCH_SET_COLLAPSE, $max, $vno);
  1390. $this->execCommand($cmd);
  1391. return $this;
  1392. }
  1393. public function addRange($field, $from, $to)
  1394. {
  1395. if ($from !== null || $to !== null)
  1396. {
  1397. if (strlen($from) > 255 || strlen($to) > 255)
  1398. throw new XSException('Value of range is too long');
  1399. $vno = $this->xs->getField($field)->vno;
  1400. $from = XS::convert($from, 'UTF-8', $this->_charset);
  1401. $to = XS::convert($to, 'UTF-8', $this->_charset);
  1402. if ($from === null)
  1403. $cmd = new XSCommand(CMD_QUERY_VALCMP, CMD_QUERY_OP_FILTER, $vno, $to, chr(CMD_VALCMP_LE));
  1404. else if ($to === null)
  1405. $cmd = new XSCommand(CMD_QUERY_VALCMP, CMD_QUERY_OP_FILTER, $vno, $from, chr(CMD_VALCMP_GE));
  1406. else
  1407. $cmd = new XSCommand(CMD_QUERY_RANGE, CMD_QUERY_OP_FILTER, $vno, $from, $to);
  1408. $this->execCommand($cmd);
  1409. }
  1410. return $this;
  1411. }
  1412. public function addWeight($field, $term, $weight = 1)
  1413. {
  1414. return $this->addQueryTerm($field, $term, CMD_QUERY_OP_AND_MAYBE, $weight);
  1415. }
  1416. public function setFacets($field, $exact = false)
  1417. {
  1418. $buf = '';
  1419. if (!is_array($field))
  1420. $field = array($field);
  1421. foreach ($field as $name)
  1422. {
  1423. $ff = $this->xs->getField($name);
  1424. if ($ff->type !== XSFieldMeta::TYPE_STRING)
  1425. throw new XSException("Field `$name' cann't be used for facets search, can only be string type");
  1426. $buf .= chr($ff->vno);
  1427. }
  1428. $cmd = array('cmd' => CMD_SEARCH_SET_FACETS, 'buf' => $buf);
  1429. $cmd['arg1'] = $exact === true ? 1 : 0;
  1430. $this->execCommand($cmd);
  1431. return $this;
  1432. }
  1433. public function getFacets($field = null)
  1434. {
  1435. if ($field === null)
  1436. return $this->_facets;
  1437. return isset($this->_facets[$field]) ? $this->_facets[$field] : array();
  1438. }
  1439. public function setLimit($limit, $offset = 0)
  1440. {
  1441. $this->_limit = intval($limit);
  1442. $this->_offset = intval($offset);
  1443. return $this;
  1444. }
  1445. public function setDb($name)
  1446. {
  1447. $name = strval($name);
  1448. $this->execCommand(array('cmd' => CMD_SEARCH_SET_DB, 'buf' => strval($name)));
  1449. $this->_lastDb = $this->_curDb;
  1450. $this->_lastDbs = $this->_curDbs;
  1451. $this->_curDb = $name;
  1452. $this->_curDbs = array();
  1453. return $this;
  1454. }
  1455. public function addDb($name)
  1456. {
  1457. $name = strval($name);
  1458. $this->execCommand(array('cmd' => CMD_SEARCH_ADD_DB, 'buf' => $name));
  1459. $this->_curDbs[] = $name;
  1460. return $this;
  1461. }
  1462. public function markResetScheme()
  1463. {
  1464. $this->_resetScheme = true;
  1465. }
  1466. public function terms($query = null, $convert = true)
  1467. {
  1468. $query = $query === null ? '' : $this->preQueryString($query);
  1469. if ($query === '' && $this->_terms !== null)
  1470. $ret = $this->_terms;
  1471. else
  1472. {
  1473. $cmd = new XSCommand(CMD_QUERY_GET_TERMS, 0, $this->_defaultOp, $query);
  1474. $res = $this->execCommand($cmd, CMD_OK_QUERY_TERMS);
  1475. $ret = array();
  1476. $tmps = explode(' ', $res->buf);
  1477. for ($i = 0; $i < count($tmps); $i++)
  1478. {
  1479. if ($tmps[$i] === '' || strpos($tmps[$i], ':') !== false)
  1480. continue;
  1481. $ret[] = $tmps[$i];
  1482. }
  1483. if ($query === '')
  1484. $this->_terms = $ret;
  1485. }
  1486. return $convert ? XS::convert($ret, $this->_charset, 'UTF-8') : $ret;
  1487. }
  1488. public function count($query = null)
  1489. {
  1490. $query = $query === null ? '' : $this->preQueryString($query);
  1491. if ($query === '' && $this->_count !== null)
  1492. return $this->_count;
  1493. $cmd = new XSCommand(CMD_SEARCH_GET_TOTAL, 0, $this->_defaultOp, $query);
  1494. $res = $this->execCommand($cmd, CMD_OK_SEARCH_TOTAL);
  1495. $ret = unpack('Icount', $res->buf);
  1496. if ($query === '')
  1497. $this->_count = $ret['count'];
  1498. return $ret['count'];
  1499. }
  1500. public function search($query = null)
  1501. {
  1502. if ($this->_curDb !== self::LOG_DB)
  1503. $this->_highlight = $query;
  1504. $query = $query === null ? '' : $this->preQueryString($query);
  1505. $page = pack('II', $this->_offset, $this->_limit > 0 ? $this->_limit : self::PAGE_SIZE);
  1506. $cmd = new XSCommand(CMD_SEARCH_GET_RESULT, 0, $this->_defaultOp, $query, $page);
  1507. $res = $this->execCommand($cmd, CMD_OK_RESULT_BEGIN);
  1508. $tmp = unpack('Icount', $res->buf);
  1509. $this->_lastCount = $tmp['count'];
  1510. $ret = $this->_facets = array();
  1511. $vnoes = $this->xs->getScheme()->getVnoMap();
  1512. while (true)
  1513. {
  1514. $res = $this->getRespond();
  1515. if ($res->cmd == CMD_SEARCH_RESULT_FACETS)
  1516. {
  1517. $off = 0;
  1518. while (($off + 6) < strlen($res->buf))
  1519. {
  1520. $tmp = unpack('Cvno/Cvlen/Inum', substr($res->buf, $off, 6));
  1521. if (isset($vnoes[$tmp['vno']]))
  1522. {
  1523. $name = $vnoes[$tmp['vno']];
  1524. $value = substr($res->buf, $off + 6, $tmp['vlen']);
  1525. if (!isset($this->_facets[$name]))
  1526. $this->_facets[$name] = array();
  1527. $this->_facets[$name][$value] = $tmp['num'];
  1528. }
  1529. $off += $tmp['vlen'] + 6;
  1530. }
  1531. }
  1532. else if ($res->cmd == CMD_SEARCH_RESULT_DOC)
  1533. {
  1534. $doc = new XSDocument($res->buf, $this->_charset);
  1535. $ret[] = $doc;
  1536. }
  1537. else if ($res->cmd == CMD_SEARCH_RESULT_FIELD)
  1538. {
  1539. if (isset($doc))
  1540. {
  1541. $name = isset($vnoes[$res->arg]) ? $vnoes[$res->arg] : $res->arg;
  1542. $doc->setField($name, $res->buf);
  1543. }
  1544. }
  1545. else if ($res->cmd == CMD_OK && $res->arg == CMD_OK_RESULT_END)
  1546. {
  1547. break;
  1548. }
  1549. else
  1550. {
  1551. $msg = 'Unexpected respond in search {CMD:' . $res->cmd . ', ARG:' . $res->arg . '}';
  1552. throw new XSException($msg);
  1553. }
  1554. }
  1555. if ($query === '')
  1556. {
  1557. $this->_count = $this->_lastCount;
  1558. $this->logQuery();
  1559. $this->initHighlight();
  1560. }
  1561. $this->_limit = $this->_offset = 0;
  1562. return $ret;
  1563. }
  1564. public function getLastCount()
  1565. {
  1566. return $this->_lastCount;
  1567. }
  1568. public function getDbTotal()
  1569. {
  1570. $cmd = new XSCommand(CMD_SEARCH_DB_TOTAL);
  1571. $res = $this->execCommand($cmd, CMD_OK_DB_TOTAL);
  1572. $tmp = unpack('Itotal', $res->buf);
  1573. return $tmp['total'];
  1574. }
  1575. public function getHotQuery($limit = 6, $type = 'total')
  1576. {
  1577. $ret = array();
  1578. $limit = max(1, min(50, intval($limit)));
  1579. $this->xs->setScheme(XSFieldScheme::logger());
  1580. try
  1581. {
  1582. $this->setDb(self::LOG_DB)->setLimit($limit);
  1583. if ($type !== 'lastnum' && $type !== 'currnum')
  1584. $type = 'total';
  1585. $result = $this->search($type . ':1');
  1586. foreach ($result as $doc) /* @var $doc XSDocument */
  1587. {
  1588. $body = $doc->body;
  1589. $ret[$body] = $doc->f($type);
  1590. }
  1591. $this->restoreDb();
  1592. }
  1593. catch (XSException $e)
  1594. {
  1595. if ($e->getCode() != CMD_ERR_XAPIAN)
  1596. throw $e;
  1597. }
  1598. $this->xs->restoreScheme();
  1599. return $ret;
  1600. }
  1601. public function getRelatedQuery($query = null, $limit = 6)
  1602. {
  1603. $ret = array();
  1604. $limit = max(1, min(20, intval($limit)));
  1605. if ($query === null)
  1606. $query = $this->cleanFieldQuery($this->_query);
  1607. if (empty($query) || strpos($query, ':') !== false)
  1608. return $ret;
  1609. $op = $this->_defaultOp;
  1610. $this->xs->setScheme(XSFieldScheme::logger());
  1611. try
  1612. {
  1613. $result = $this->setDb(self::LOG_DB)->setFuzzy()->setLimit($limit + 1)->search($query);
  1614. foreach ($result as $doc) /* @var $doc XSDocument */
  1615. {
  1616. $doc->setCharset($this->_charset);
  1617. $body = $doc->body;
  1618. if (!strcasecmp($body, $query))
  1619. continue;
  1620. $ret[] = $body;
  1621. if (count($ret) == $limit)
  1622. break;
  1623. }
  1624. }
  1625. catch (XSException $e)
  1626. {
  1627. if ($e->getCode() != CMD_ERR_XAPIAN)
  1628. throw $e;
  1629. }
  1630. $this->restoreDb();
  1631. $this->xs->restoreScheme();
  1632. $this->_defaultOp = $op;
  1633. return $ret;
  1634. }
  1635. public function getExpandedQuery($query, $limit = 10)
  1636. {
  1637. $ret = array();
  1638. $limit = max(1, min(20, intval($limit)));
  1639. try
  1640. {
  1641. $buf = XS::convert($query, 'UTF-8', $this->_charset);
  1642. $cmd = array('cmd' => CMD_QUERY_GET_EXPANDED, 'arg1' => $limit, 'buf' => $buf);
  1643. $res = $this->execCommand($cmd, CMD_OK_RESULT_BEGIN);
  1644. while (true)
  1645. {
  1646. $res = $this->getRespond();
  1647. if ($res->cmd == CMD_SEARCH_RESULT_FIELD)
  1648. {
  1649. $ret[] = XS::convert($res->buf, $this->_charset, 'UTF-8');
  1650. }
  1651. else if ($res->cmd == CMD_OK && $res->arg == CMD_OK_RESULT_END)
  1652. {
  1653. break;
  1654. }
  1655. else
  1656. {
  1657. $msg = 'Unexpected respond in search {CMD:' . $res->cmd . ', ARG:' . $res->arg . '}';
  1658. throw new XSException($msg);
  1659. }
  1660. }
  1661. }
  1662. catch (XSException $e)
  1663. {
  1664. if ($e->getCode() != CMD_ERR_XAPIAN)
  1665. throw $e;
  1666. }
  1667. return $ret;
  1668. }
  1669. public function getCorrectedQuery($query = null)
  1670. {
  1671. $ret = array();
  1672. try
  1673. {
  1674. if ($query === null)
  1675. {
  1676. if ($this->_count > 0 && $this->_count > ceil($this->getDbTotal() * 0.001))
  1677. return $ret;
  1678. $query = $this->cleanFieldQuery($this->_query);
  1679. }
  1680. if (empty($query) || strpos($query, ':') !== false)
  1681. return $ret;
  1682. $buf = XS::convert($query, 'UTF-8', $this->_charset);
  1683. $cmd = array('cmd' => CMD_QUERY_GET_CORRECTED, 'buf' => $buf);
  1684. $res = $this->execCommand($cmd, CMD_OK_QUERY_CORRECTED);
  1685. if ($res->buf !== '')
  1686. $ret = explode("\n", XS::convert($res->buf, $this->_charset, 'UTF-8'));
  1687. }
  1688. catch (XSException $e)
  1689. {
  1690. if ($e->getCode() != CMD_ERR_XAPIAN)
  1691. throw $e;
  1692. }
  1693. return $ret;
  1694. }
  1695. public function addSearchLog($query, $wdf = 1)
  1696. {
  1697. $cmd = array('cmd' => CMD_SEARCH_ADD_LOG, 'buf' => $query);
  1698. if ($wdf > 1)
  1699. $cmd['buf1'] = pack('i', $wdf);
  1700. $this->execCommand($cmd, CMD_OK_LOGGED);
  1701. }
  1702. public function highlight($value)
  1703. {
  1704. if (empty($value))
  1705. return $value;
  1706. if (!is_array($this->_highlight))
  1707. $this->initHighlight();
  1708. if (isset($this->_highlight['pattern']))
  1709. $value = preg_replace($this->_highlight['pattern'], $this->_highlight['replace'], $value);
  1710. if (isset($this->_highlight['pairs']))
  1711. $value = str_replace(array_keys($this->_highlight['pairs']), array_values($this->_highlight['pairs']), $value);
  1712. return $value;
  1713. }
  1714. private function logQuery($query = null)
  1715. {
  1716. if ($query !== '' && $query !== null)
  1717. $terms = $this->terms($query, false);
  1718. else
  1719. {
  1720. $query = $this->_query;
  1721. if (!$this->_lastCount || ($this->_defaultOp == CMD_QUERY_OP_OR && strpos($query, ' '))
  1722. || strpos($query, ' OR ') || strpos($query, ' NOT ') || strpos($query, ' XOR '))
  1723. {
  1724. return;
  1725. }
  1726. $terms = $this->terms(null, false);
  1727. }
  1728. $log = '';
  1729. $pos = $max = 0;
  1730. foreach ($terms as $term)
  1731. {
  1732. $pos1 = ($pos > 3 && strlen($term) === 6) ? $pos - 3 : $pos;
  1733. if (($pos2 = strpos($query, $term, $pos1)) === false)
  1734. continue;
  1735. if ($pos2 === $pos)
  1736. $log .= $term;
  1737. else if ($pos2 < $pos)
  1738. $log .= substr($term, 3);
  1739. else
  1740. {
  1741. if (++$max > 3 || strlen($log) > 42)
  1742. break;
  1743. $log .= ' ' . $term;
  1744. }
  1745. $pos = $pos2 + strlen($term);
  1746. }
  1747. $log = trim($log);
  1748. if (strlen($log) < 2 || (strlen($log) == 3 && ord($log[0]) > 0x80))
  1749. return;
  1750. $this->addSearchLog($log);
  1751. }
  1752. private function clearQuery()
  1753. {
  1754. $cmd = new XSCommand(CMD_QUERY_INIT);
  1755. if ($this->_resetScheme === true)
  1756. {
  1757. $cmd->arg1 = 1;
  1758. $this->_prefix = array();
  1759. $this->_fieldSet = false;
  1760. $this->_resetScheme = false;
  1761. }
  1762. $this->execCommand($cmd);
  1763. $this->_query = $this->_count = $this->_terms = null;
  1764. }
  1765. private function addQueryString($query, $addOp = CMD_QUERY_OP_AND, $scale = 1)
  1766. {
  1767. $query = $this->preQueryString($query);
  1768. $bscale = ($scale > 0 && $scale != 1) ? pack('n', intval($scale * 100)) : '';
  1769. $cmd = new XSCommand(CMD_QUERY_PARSE, $addOp, $this->_defaultOp, $query, $bscale);
  1770. $this->execCommand($cmd);
  1771. return $query;
  1772. }
  1773. public function addQueryTerm($field, $term, $addOp = CMD_QUERY_OP_AND, $scale = 1)
  1774. {
  1775. $term = strtolower($term);
  1776. $term = XS::convert($term, 'UTF-8', $this->_charset);
  1777. $bscale = ($scale > 0 && $scale != 1) ? pack('n', intval($scale * 100)) : '';
  1778. $vno = $field === null ? XSFieldScheme::MIXED_VNO : $this->xs->getField($field, true)->vno;
  1779. $cmd = new XSCommand(CMD_QUERY_TERM, $addOp, $vno, $term, $bscale);
  1780. $this->execCommand($cmd);
  1781. return $this;
  1782. }
  1783. private function restoreDb()
  1784. {
  1785. $db = $this->_lastDb;
  1786. $dbs = $this->_lastDbs;
  1787. $this->setDb($db);
  1788. foreach ($dbs as $name)
  1789. {
  1790. $this->addDb($name);
  1791. }
  1792. }
  1793. private function preQueryString($query)
  1794. {
  1795. $query = trim($query);
  1796. if ($this->_resetScheme === true)
  1797. $this->clearQuery();
  1798. $newQuery = '';
  1799. $parts = preg_split('/[ \t\r\n]+/', $query);
  1800. foreach ($parts as $part)
  1801. {
  1802. if ($part === '')
  1803. continue;
  1804. if ($newQuery != '')
  1805. $newQuery .= ' ';
  1806. if (($pos = strpos($part, ':', 1)) !== false)
  1807. {
  1808. for ($i = 0; $i < $pos; $i++)
  1809. {
  1810. if (strpos('+-~(', $part[$i]) === false)
  1811. break;
  1812. }
  1813. $name = substr($part, $i, $pos - $i);
  1814. if (($field = $this->xs->getField($name, false)) !== false
  1815. && $field->vno != XSFieldScheme::MIXED_VNO)
  1816. {
  1817. $this->regQueryPrefix($name);
  1818. if (!$field->isBoolIndex() && substr($part, $pos + 1, 1) != '('
  1819. && preg_match('/[\x81-\xfe]/', $part))
  1820. {
  1821. $newQuery .= substr($part, 0, $pos + 1) . '(' . substr($part, $pos + 1) . ')';
  1822. }
  1823. else if ($field->isBoolIndex())
  1824. {
  1825. $value = substr($part, $pos + 1);
  1826. if (!$field->hasCustomTokenizer())
  1827. $newQuery .= substr($part, 0, $pos + 1) . strtolower($value);
  1828. else
  1829. {
  1830. $terms = array();
  1831. $tokens = $field->getCustomTokenizer()->getTokens($value);
  1832. foreach ($tokens as $term)
  1833. {
  1834. $terms[] = strtolower($term);
  1835. }
  1836. $terms = array_unique($terms);
  1837. $newQuery .= $name . ':' . implode(' ' . $name . ':', $terms);
  1838. }
  1839. }
  1840. else
  1841. {
  1842. $newQuery .= $part;
  1843. }
  1844. continue;
  1845. }
  1846. }
  1847. if (($part[0] == '+' || $part[0] == '-') && $part[1] != '('
  1848. && preg_match('/[\x81-\xfe]/', $part))
  1849. {
  1850. $newQuery .= substr($part, 0, 1) . '(' . substr($part, 1) . ')';
  1851. continue;
  1852. }
  1853. $newQuery .= $part;
  1854. }
  1855. if ($this->_fieldSet !== true)
  1856. {
  1857. foreach ($this->xs->getAllFields() as $field) /* @var $field XSFieldMeta */
  1858. {
  1859. if ($field->cutlen != 0)
  1860. {
  1861. $len = min(127, ceil($field->cutlen / 10));
  1862. $cmd = new XSCommand(CMD_SEARCH_SET_CUT, $len, $field->vno);
  1863. $this->execCommand($cmd);
  1864. }
  1865. if ($field->isNumeric())
  1866. {
  1867. $cmd = new XSCommand(CMD_SEARCH_SET_NUMERIC, 0, $field->vno);
  1868. $this->execCommand($cmd);
  1869. }
  1870. }
  1871. $this->_fieldSet = true;
  1872. }
  1873. return XS::convert($newQuery, 'UTF-8', $this->_charset);
  1874. }
  1875. private function regQueryPrefix($name)
  1876. {
  1877. if (!isset($this->_prefix[$name])
  1878. && ($field = $this->xs->getField($name, false))
  1879. && ($field->vno != XSFieldScheme::MIXED_VNO))
  1880. {
  1881. $type = $field->isBoolIndex() ? CMD_PREFIX_BOOLEAN : CMD_PREFIX_NORMAL;
  1882. $cmd = new XSCommand(CMD_QUERY_PREFIX, $type, $field->vno, $name);
  1883. $this->execCommand($cmd);
  1884. $this->_prefix[$name] = true;
  1885. }
  1886. }
  1887. private function cleanFieldQuery($query)
  1888. {
  1889. $query = strtr($query, array(' AND ' => ' ', ' OR ' => ' '));
  1890. if (strpos($query, ':') !== false)
  1891. {
  1892. $regex = '/(^|\s)([0-9A-Za-z_\.-]+):([^\s]+)/';
  1893. return preg_replace_callback($regex, array($this, 'cleanFieldCallback'), $query);
  1894. }
  1895. return $query;
  1896. }
  1897. private function cleanFieldCallback($match)
  1898. {
  1899. if (($field = $this->xs->getField($match[2], false)) === false)
  1900. return $match[0];
  1901. if ($field->isBoolIndex())
  1902. return '';
  1903. if (substr($match[3], 0, 1) == '(' && substr($match[3], -1, 1) == ')')
  1904. $match[3] = substr($match[3], 1, -1);
  1905. return $match[1] . $match[3];
  1906. }
  1907. private function initHighlight()
  1908. {
  1909. $terms = array();
  1910. $tmps = $this->terms($this->_highlight, false);
  1911. for ($i = 0; $i < count($tmps); $i++)
  1912. {
  1913. if (strlen($tmps[$i]) !== 6 || ord(substr($tmps[$i], 0, 1)) < 0xc0)
  1914. {
  1915. $terms[] = XS::convert($tmps[$i], $this->_charset, 'UTF-8');
  1916. continue;
  1917. }
  1918. for ($j = $i + 1; $j < count($tmps); $j++)
  1919. {
  1920. if (strlen($tmps[$j]) !== 6 || substr($tmps[$j], 0, 3) !== substr($tmps[$j - 1], 3, 3))
  1921. break;
  1922. }
  1923. if (($k = ($j - $i)) === 1)
  1924. $terms[] = XS::convert($tmps[$i], $this->_charset, 'UTF-8');
  1925. else
  1926. {
  1927. $i = $j - 1;
  1928. while ($k--)
  1929. {
  1930. $j--;
  1931. if ($k & 1)
  1932. $terms[] = XS::convert(substr($tmps[$j - 1], 0, 3) . $tmps[$j], $this->_charset, 'UTF-8');
  1933. $terms[] = XS::convert($tmps[$j], $this->_charset, 'UTF-8');
  1934. }
  1935. }
  1936. }
  1937. $pattern = $replace = $pairs = array();
  1938. foreach ($terms as $term)
  1939. {
  1940. if (!preg_match('/[a-zA-Z]/', $term))
  1941. $pairs[$term] = '<em>' . $term . '</em>';
  1942. else
  1943. {
  1944. $pattern[] = '/' . strtr($term, array('+' => '\\+', '/' => '\\/')) . '/i';
  1945. $replace[] = '<em>$0</em>';
  1946. }
  1947. }
  1948. $this->_highlight = array();
  1949. if (count($pairs) > 0)
  1950. $this->_highlight['pairs'] = $pairs;
  1951. if (count($pattern) > 0)
  1952. {
  1953. $this->_highlight['pattern'] = $pattern;
  1954. $this->_highlight['replace'] = $replace;
  1955. }
  1956. }
  1957. }
  1958. class XSCommand extends XSComponent
  1959. {
  1960. public $cmd = CMD_NONE;
  1961. public $arg1 = 0;
  1962. public $arg2 = 0;
  1963. public $buf = '';
  1964. public $buf1 = '';
  1965. public function __construct($cmd, $arg1 = 0, $arg2 = 0, $buf = '', $buf1 = '')
  1966. {
  1967. if (is_array($cmd))
  1968. {
  1969. foreach ($cmd as $key => $value)
  1970. {
  1971. if ($key === 'arg' || property_exists($this, $key))
  1972. $this->$key = $value;
  1973. }
  1974. }
  1975. else
  1976. {
  1977. $this->cmd = $cmd;
  1978. $this->arg1 = $arg1;
  1979. $this->arg2 = $arg2;
  1980. $this->buf = $buf;
  1981. $this->buf1 = $buf1;
  1982. }
  1983. }
  1984. public function __toString()
  1985. {
  1986. if (strlen($this->buf1) > 0xff)
  1987. $this->buf1 = substr($this->buf1, 0, 0xff);
  1988. return pack('CCCCI', $this->cmd, $this->arg1, $this->arg2, strlen($this->buf1), strlen($this->buf)) . $this->buf . $this->buf1;
  1989. }
  1990. public function getArg()
  1991. {
  1992. return $this->arg2 | ($this->arg1 << 8);
  1993. }
  1994. public function setArg($arg)
  1995. {
  1996. $this->arg1 = ($arg >> 8) & 0xff;
  1997. $this->arg2 = $arg & 0xff;
  1998. }
  1999. }
  2000. class XSServer extends XSComponent
  2001. {
  2002. const FILE = 0x01;
  2003. const BROKEN = 0x02;
  2004. public $xs;
  2005. private $_sock, $_conn;
  2006. private $_flag;
  2007. private $_project;
  2008. private $_sendBuffer;
  2009. public function __construct($conn = null, $xs = null)
  2010. {
  2011. $this->xs = $xs;
  2012. if ($conn !== null)
  2013. $this->open($conn);
  2014. }
  2015. public function __destruct()
  2016. {
  2017. $this->xs = null;
  2018. $this->close();
  2019. }
  2020. public function open($conn)
  2021. {
  2022. $this->close();
  2023. $this->_conn = $conn;
  2024. $this->_flag = self::BROKEN;
  2025. $this->_sendBuffer = '';
  2026. $this->_project = null;
  2027. $this->connect();
  2028. $this->_flag ^= self::BROKEN;
  2029. if ($this->xs instanceof XS)
  2030. $this->setProject($this->xs->getName());
  2031. }
  2032. public function reopen($force = false)
  2033. {
  2034. if ($this->_flag & self::BROKEN || $force === true)
  2035. $this->open($this->_conn);
  2036. return $this;
  2037. }
  2038. public function close($ioerr = false)
  2039. {
  2040. if ($this->_sock && !($this->_flag & self::BROKEN))
  2041. {
  2042. if (!$ioerr && $this->_sendBuffer !== '')
  2043. {
  2044. $this->write($this->_sendBuffer);
  2045. $this->_sendBuffer = '';
  2046. }
  2047. if (!$ioerr && !($this->_flag & self::FILE))
  2048. {
  2049. $cmd = new XSCommand(CMD_QUIT);
  2050. fwrite($this->_sock, $cmd);
  2051. }
  2052. fclose($this->_sock);
  2053. $this->_flag |= self::BROKEN;
  2054. }
  2055. }
  2056. public function getSocket()
  2057. {
  2058. return $this->_sock;
  2059. }
  2060. public function getProject()
  2061. {
  2062. return $this->_project;
  2063. }
  2064. public function setProject($name, $home = '')
  2065. {
  2066. if ($name !== $this->_project)
  2067. {
  2068. $cmd = array('cmd' => CMD_USE, 'buf' => $name, 'buf1' => $home);
  2069. $this->execCommand($cmd, CMD_OK_PROJECT);
  2070. $this->_project = $name;
  2071. }
  2072. }
  2073. public function setTimeout($sec)
  2074. {
  2075. $cmd = array('cmd' => CMD_TIMEOUT, 'arg' => $sec);
  2076. $this->execCommand($cmd, CMD_OK_TIMEOUT_SET);
  2077. }
  2078. public function execCommand($cmd, $res_arg = CMD_NONE, $res_cmd = CMD_OK)
  2079. {
  2080. if (!$cmd instanceof XSCommand)
  2081. $cmd = new XSCommand($cmd);
  2082. if ($cmd->cmd & 0x80)
  2083. {
  2084. $this->_sendBuffer .= $cmd;
  2085. return true;
  2086. }
  2087. $buf = $this->_sendBuffer . $cmd;
  2088. $this->_sendBuffer = '';
  2089. $this->write($buf);
  2090. if ($this->_flag & self::FILE)
  2091. return true;
  2092. $res = $this->getRespond();
  2093. if ($res->cmd === CMD_ERR && $res_cmd != CMD_ERR)
  2094. throw new XSException($res->buf, $res->arg);
  2095. if ($res->cmd != $res_cmd || ($res_arg != CMD_NONE && $res->arg != $res_arg))
  2096. throw new XSException('Unexpected respond {CMD:' . $res->cmd . ', ARG:' . $res->arg . '}');
  2097. return $res;
  2098. }
  2099. public function sendCommand($cmd)
  2100. {
  2101. if (!$cmd instanceof XSCommand)
  2102. $cmd = new XSCommand($cmd);
  2103. $this->write(strval($cmd));
  2104. }
  2105. public function getRespond()
  2106. {
  2107. $buf = $this->read(8);
  2108. $hdr = unpack('Ccmd/Carg1/Carg2/Cblen1/Iblen', $buf);
  2109. $res = new XSCommand($hdr);
  2110. $res->buf = $this->read($hdr['blen']);
  2111. $res->buf1 = $this->read($hdr['blen1']);
  2112. return $res;
  2113. }
  2114. public function hasRespond()
  2115. {
  2116. if ($this->_sock === null || $this->_flag & (self::BROKEN | self::FILE))
  2117. return false;
  2118. $wfds = $xfds = array();
  2119. $rfds = array($this->_sock);
  2120. $res = stream_select($rfds, $wfds, $xfds, 0, 0);
  2121. return $res > 0;
  2122. }
  2123. private function write($buf, $len = 0)
  2124. {
  2125. $buf = strval($buf);
  2126. if ($len == 0 && ($len = $size = strlen($buf)) == 0)
  2127. return true;
  2128. $this->check();
  2129. while (true)
  2130. {
  2131. $bytes = fwrite($this->_sock, $buf, $len);
  2132. if ($bytes === false || $bytes === 0 || $bytes === $len)
  2133. break;
  2134. $len -= $bytes;
  2135. $buf = substr($buf, $bytes);
  2136. }
  2137. if ($bytes === false || $bytes === 0)
  2138. {
  2139. $meta = stream_get_meta_data($this->_sock);
  2140. $this->close(true);
  2141. $reason = $meta['timed_out'] ? 'timeout' : ($meta['eof'] ? 'closed' : 'unknown');
  2142. $msg = 'Failed to send the data to server completely ';
  2143. $msg .= '(SIZE:' . ($size - $len) . '/' . $size . ', REASON:' . $reason . ')';
  2144. throw new XSException($msg);
  2145. }
  2146. }
  2147. private function read($len)
  2148. {
  2149. if ($len == 0)
  2150. return '';
  2151. $this->check();
  2152. for ($buf = '', $size = $len;;)
  2153. {
  2154. $bytes = fread($this->_sock, $len);
  2155. if ($bytes === false || strlen($bytes) == 0)
  2156. break;
  2157. $len -= strlen($bytes);
  2158. $buf .= $bytes;
  2159. if ($len === 0)
  2160. return $buf;
  2161. }
  2162. $meta = stream_get_meta_data($this->_sock);
  2163. $this->close(true);
  2164. $reason = $meta['timed_out'] ? 'timeout' : ($meta['eof'] ? 'closed' : 'unknown');
  2165. $msg = 'Failed to recv the data from server completely ';
  2166. $msg .= '(SIZE:' . ($size - $len) . '/' . $size . ', REASON:' . $reason . ')';
  2167. throw new XSException($msg);
  2168. }
  2169. private function check()
  2170. {
  2171. if ($this->_sock === null)
  2172. throw new XSException('No server connection');
  2173. if ($this->_flag & self::BROKEN)
  2174. throw new XSException('Broken server connection');
  2175. }
  2176. private function connect()
  2177. {
  2178. $conn = $this->_conn;
  2179. if (is_int($conn) || is_numeric($conn))
  2180. {
  2181. $host = 'localhost';
  2182. $port = intval($conn);
  2183. }
  2184. else if (!strncmp($conn, 'file://', 7))
  2185. {
  2186. $conn = substr($conn, 7);
  2187. if (($sock = @fopen($conn, 'wb')) === false)
  2188. throw new XSException('Failed to open local file for writing: `' . $conn . '\'');
  2189. $this->_flag |= self::FILE;
  2190. $this->_sock = $sock;
  2191. return;
  2192. }
  2193. else if (($pos = strpos($conn, ':')) !== false)
  2194. {
  2195. $host = substr($conn, 0, $pos);
  2196. $port = intval(substr($conn, $pos + 1));
  2197. }
  2198. else
  2199. {
  2200. $host = 'unix://' . $conn;
  2201. $port = -1;
  2202. }
  2203. if (($sock = @fsockopen($host, $port, $errno, $error, 5)) === false)
  2204. throw new XSException($error . '(C#' . $errno . ')');
  2205. $timeout = ini_get('max_execution_time');
  2206. $timeout = $timeout > 0 ? ($timeout - 1) : 30;
  2207. stream_set_blocking($sock, true);
  2208. stream_set_timeout($sock, $timeout);
  2209. $this->_sock = $sock;
  2210. }
  2211. }
  2212. interface XSTokenizer
  2213. {
  2214. const DFL = 0;
  2215. public function getTokens($value, XSDocument $doc = null);
  2216. }
  2217. class XSTokenizerNone implements XSTokenizer
  2218. {
  2219. public function getTokens($value, XSDocument $doc = null)
  2220. {
  2221. return array();
  2222. }
  2223. }
  2224. class XSTokenizerFull implements XSTokenizer
  2225. {
  2226. public function getTokens($value, XSDocument $doc = null)
  2227. {
  2228. return array($value);
  2229. }
  2230. }
  2231. class XSTokenizerSplit implements XSTokenizer
  2232. {
  2233. private $arg = ' ';
  2234. public function __construct($arg = null)
  2235. {
  2236. if ($arg !== null && $arg !== '')
  2237. $this->arg = $arg;
  2238. }
  2239. public function getTokens($value, XSDocument $doc = null)
  2240. {
  2241. if (strlen($this->arg) > 2 && substr($this->arg, 0, 1) == '/' && substr($this->arg, -1, 1) == '/')
  2242. return preg_split($this->arg, $value);
  2243. return explode($this->arg, $value);
  2244. }
  2245. }
  2246. class XSTokenizerXlen implements XSTokenizer
  2247. {
  2248. private $arg = 2;
  2249. public function __construct($arg = null)
  2250. {
  2251. if ($arg !== null && $arg !== '')
  2252. {
  2253. $this->arg = intval($arg);
  2254. if ($this->arg < 1 || $this->arg > 255)
  2255. throw new XSException('Invalid argument for ' . __CLASS__ . ': ' . $arg);
  2256. }
  2257. }
  2258. public function getTokens($value, XSDocument $doc = null)
  2259. {
  2260. $terms = array();
  2261. for ($i = 0; $i < strlen($value); $i += $this->arg)
  2262. {
  2263. $terms[] = substr($value, $i, $this->arg);
  2264. }
  2265. return $terms;
  2266. }
  2267. }
  2268. class XSTokenizerXstep implements XSTokenizer
  2269. {
  2270. private $arg = 2;
  2271. public function __construct($arg = null)
  2272. {
  2273. if ($arg !== null && $arg !== '')
  2274. {
  2275. $this->arg = intval($arg);
  2276. if ($this->arg < 1 || $this->arg > 255)
  2277. throw new XSException('Invalid argument for ' . __CLASS__ . ': ' . $arg);
  2278. }
  2279. }
  2280. public function getTokens($value, XSDocument $doc = null)
  2281. {
  2282. $terms = array();
  2283. $i = $this->arg;
  2284. while (true)
  2285. {
  2286. $terms[] = substr($value, 0, $i);
  2287. if ($i >= strlen($value))
  2288. break;
  2289. $i += $this->arg;
  2290. }
  2291. return $terms;
  2292. }
  2293. }
  2294. class XSTokenizerScws implements XSTokenizer
  2295. {
  2296. const MULTI_MASK = 15;
  2297. private $_charset;
  2298. private $_setting = array();
  2299. private static $_server;
  2300. public function __construct()
  2301. {
  2302. if (self::$_server === null)
  2303. {
  2304. $xs = XS::getLastXS();
  2305. if ($xs === null)
  2306. throw new XSException('An XS instance should be created before using ' . __CLASS__);
  2307. self::$_server = $xs->getScwsServer();
  2308. self::$_server->setTimeout(0);
  2309. $this->_charset = $xs->getDefaultCharset();
  2310. if (!defined('SCWS_MULTI_NONE'))
  2311. {
  2312. define('SCWS_MULTI_NONE', 0);
  2313. define('SCWS_MULTI_SHORT', 1);
  2314. define('SCWS_MULTI_DUALITY', 2);
  2315. define('SCWS_MULTI_ZMAIN', 4);
  2316. define('SCWS_MULTI_ZALL', 8);
  2317. }
  2318. }
  2319. }
  2320. public function getTokens($value, XSDocument $doc = null)
  2321. {
  2322. $tokens = array();
  2323. $this->setIgnore(true);
  2324. $_charset = $this->_charset;
  2325. $this->_charset = 'UTF-8';
  2326. $words = $this->getResult($value);
  2327. foreach ($words as $word)
  2328. {
  2329. $tokens[] = $word['word'];
  2330. }
  2331. $this->_charset = $_charset;
  2332. return $tokens;
  2333. }
  2334. public function setCharset($charset)
  2335. {
  2336. $this->_charset = strtoupper($charset);
  2337. if ($this->_charset == 'UTF8')
  2338. $this->_charset = 'UTF-8';
  2339. return $this;
  2340. }
  2341. public function setIgnore($yes = true)
  2342. {
  2343. $this->_setting['ignore'] = new XSCommand(CMD_SEARCH_SCWS_SET, CMD_SCWS_SET_IGNORE, $yes === false ? 0 : 1);
  2344. return $this;
  2345. }
  2346. public function setMulti($mode = 3)
  2347. {
  2348. $mode = intval($mode) & self::MULTI_MASK;
  2349. $this->_setting['multi'] = new XSCommand(CMD_SEARCH_SCWS_SET, CMD_SCWS_SET_MULTI, $mode);
  2350. return $this;
  2351. }
  2352. public function setDuality($yes = true)
  2353. {
  2354. $this->_setting['duality'] = new XSCommand(CMD_SEARCH_SCWS_SET, CMD_SCWS_SET_DUALITY, $yes === false ? 0 : 1);
  2355. return $this;
  2356. }
  2357. public function getVersion()
  2358. {
  2359. $cmd = new XSCommand(CMD_SEARCH_SCWS_GET, CMD_SCWS_GET_VERSION);
  2360. $res = self::$_server->execCommand($cmd, CMD_OK_INFO);
  2361. return $res->buf;
  2362. }
  2363. public function getResult($text)
  2364. {
  2365. $words = array();
  2366. $text = $this->applySetting($text);
  2367. $cmd = new XSCommand(CMD_SEARCH_SCWS_GET, CMD_SCWS_GET_RESULT, 0, $text);
  2368. $res = self::$_server->execCommand($cmd, CMD_OK_SCWS_RESULT);
  2369. while ($res->buf !== '')
  2370. {
  2371. $tmp = unpack('Ioff/a4attr/a*word', $res->buf);
  2372. $tmp['word'] = XS::convert($tmp['word'], $this->_charset, 'UTF-8');
  2373. $words[] = $tmp;
  2374. $res = self::$_server->getRespond();
  2375. }
  2376. return $words;
  2377. }
  2378. public function getTops($text, $limit = 10, $xattr = '')
  2379. {
  2380. $words = array();
  2381. $text = $this->applySetting($text);
  2382. $cmd = new XSCommand(CMD_SEARCH_SCWS_GET, CMD_SCWS_GET_TOPS, $limit, $text, $xattr);
  2383. $res = self::$_server->execCommand($cmd, CMD_OK_SCWS_TOPS);
  2384. while ($res->buf !== '')
  2385. {
  2386. $tmp = unpack('Itimes/a4attr/a*word', $res->buf);
  2387. $tmp['word'] = XS::convert($tmp['word'], $this->_charset, 'UTF-8');
  2388. $words[] = $tmp;
  2389. $res = self::$_server->getRespond();
  2390. }
  2391. return $words;
  2392. }
  2393. public function hasWord($text, $xattr)
  2394. {
  2395. $text = $this->applySetting($text);
  2396. $cmd = new XSCommand(CMD_SEARCH_SCWS_GET, CMD_SCWS_HAS_WORD, 0, $text, $xattr);
  2397. $res = self::$_server->execCommand($cmd, CMD_OK_INFO);
  2398. return $res->buf === 'OK';
  2399. }
  2400. private function applySetting($text)
  2401. {
  2402. self::$_server->reopen();
  2403. foreach ($this->_setting as $key => $cmd)
  2404. {
  2405. self::$_server->execCommand($cmd);
  2406. }
  2407. return XS::convert($text, 'UTF-8', $this->_charset);
  2408. }
  2409. }