Quest.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. #!/usr/bin/env php
  2. <?php
  3. /**
  4. * Xunsearch PHP-SDK 搜索测试工具
  5. *
  6. * @author hightman
  7. * @link http://www.xunsearch.com/
  8. * @copyright Copyright &copy; 2011 HangZhou YunSheng Network Technology Co., Ltd.
  9. * @license http://www.xunsearch.com/license/
  10. * @version $Id$
  11. */
  12. require_once dirname(__FILE__) . '/../lib/XS.php';
  13. require_once dirname(__FILE__) . '/XSUtil.class.php';
  14. // check arguments
  15. XSUtil::parseOpt(array('p', 'q', 'c', 'd', 'project', 'query', 'db', 'limit', 'charset'));
  16. $project = XSUtil::getOpt('p', 'project', true);
  17. $query = XSUtil::getOpt('q', 'query', true);
  18. $hot = XSUtil::getOpt(null, 'hot');
  19. $synonyms = XSUtil::getOpt(null, 'list-synonyms');
  20. // magick output charset
  21. $charset = XSUtil::getOpt('c', 'charset');
  22. XSUtil::setCharset($charset);
  23. $query = XSUtil::convertIn($query);
  24. if (XSUtil::getOpt('h', 'help') !== null || !is_string($project)
  25. || (!$hot && !$synonyms && !is_string($query)))
  26. {
  27. $version = PACKAGE_NAME . '/' . PACKAGE_VERSION;
  28. echo <<<EOF
  29. Quest - 搜索查询和测试工具 ($version)
  30. 用法
  31. {$_SERVER['argv'][0]} [options] [-p|--project] <project> [[-q|--query] <query>]
  32. 选项说明
  33. --project=<name|ini>
  34. -p <project> 用于指定要搜索的项目名称或项目配置文件的路径,
  35. 如果指定的是名称,则使用 ../app/<name>.ini 作为配置文件
  36. --query=<query>
  37. -q <query> 指定要搜索的查询语句,如果语句中包含空格请用使用双引号包围起来
  38. --fuzzy 将搜索默认设为模糊搜索
  39. --synonym 开启自动同义词搜索功能
  40. --charset=<gbk|utf-8>
  41. -c <charset> 指定您当前在用的字符集,以便系统进行智能转换(默认:UTF-8)
  42. --db=<name[,name2 ...]>
  43. -d <db[,db2 ...]> 指定项目中的数据库名称,默认是名为 db 的库,多个库之间用逗号分隔
  44. --hot[=total|last|cur]
  45. 用于显示指定项目的热门搜索词,此时 <query> 参数无意义,可省略
  46. 其值含义分别表示总搜索量、上周搜索量、本周搜索量,默认为总搜索量。
  47. --suggest 根据当前搜索词展开常用搜索词建议,如查询“中”,即显示“中”开头的词
  48. --correct 根据当前搜索词进行同音、拼写纠错,输出更合适的关键词
  49. --related 根据当前搜索词查找相关搜索词
  50. --list-synonyms[=stemmed]
  51. 列出库内的全部同义词,每行显示一个,可以搭配 --limit 使用,默认显示前 100 个
  52. 如果设置了 stemmed 值则连同词根同义词也列出
  53. --limit=<num>用于设置 suggest|hot|related 的返回数量,两者默认值均为 10 个
  54. 对于普通搜索和列出同义词时,还支持用 --limit=offset,num 的格式
  55. --show-query 用于在搜索结果显示内部的 Xapian 结构的 query 语句用于调试
  56. -h|--help 显示帮助信息
  57. 若未指定 -p 或 -q 则会依次把附加的参数当作 <project> 和 <query> 处理,例:
  58. {$_SERVER['argv'][0]} <project> <query>
  59. {$_SERVER['argv'][0]} --hot <project>
  60. EOF;
  61. exit(0);
  62. }
  63. // create xs project
  64. $ini = file_exists($project) ? $project : dirname(__FILE__) . '/../app/' . $project . '.ini';
  65. if (!file_exists($ini))
  66. {
  67. echo "错误:无效的项目名称 ($project),不存在相应的配置文件。\n";
  68. exit(-1);
  69. }
  70. // execute the search
  71. try
  72. {
  73. // params
  74. $params = array('hot', 'suggest', 'correct', 'related', 'output', 'limit');
  75. foreach ($params as $_)
  76. {
  77. $$_ = XSUtil::getOpt(null, $_);
  78. }
  79. $limit1 = $limit === null ? 10 : intval($limit);
  80. $db = XSUtil::getOpt('d', 'db');
  81. // create xs object
  82. $xs = new XS($ini);
  83. $search = $xs->search;
  84. $search->setCharset('UTF-8');
  85. if ($db !== null)
  86. {
  87. $dbs = explode(',', $db);
  88. $search->setDb(trim($dbs[0]));
  89. for ($i = 1; $i < count($dbs); $i++)
  90. {
  91. $search->addDb(trim($dbs[$i]));
  92. }
  93. }
  94. if ($hot !== null)
  95. {
  96. $type = $hot === 'cur' ? 'currnum' : ($hot === 'last' ? 'lastnum' : 'total');
  97. $result = $search->getHotQuery($limit1, $type);
  98. if (count($result) === 0)
  99. echo "暂无相关热门搜索记录。\n";
  100. else
  101. {
  102. $i = 1;
  103. printf("序 %s %s\n%s\n", XSUtil::fixWidth('搜索关键词(' . $type . ')', 40), XSUtil::fixWidth('次数', 10), XSUtil::fixWidth('', 56, '-'));
  104. foreach ($result as $word => $freq)
  105. {
  106. printf("%2d. %s %d\n", $i, XSUtil::fixWidth($word, 40), $freq);
  107. $i++;
  108. }
  109. }
  110. }
  111. else if ($synonyms !== null)
  112. {
  113. if ($limit === null)
  114. $offset = $limit1 = 0;
  115. else if (($pos = strpos($limit, ',')) === false)
  116. $offset = 0;
  117. else
  118. {
  119. $limit1 = intval(substr($limit, $pos + 1));
  120. $offset = intval($limit);
  121. }
  122. $synonyms = $search->getAllSynonyms($limit1, $offset, $synonyms === 'stemmed');
  123. if (count($synonyms) == 0)
  124. {
  125. echo "暂无相关的同义词记录";
  126. if ($offset != 0)
  127. echo ",反正总数不超过 $offset 个";
  128. echo "。\n";
  129. }
  130. else
  131. {
  132. $i = $offset + 1;
  133. printf(" %s %s\n%s\n", XSUtil::fixWidth('原词', 32), '同义词', XSUtil::fixWidth('', 56, '-'));
  134. foreach ($synonyms as $raw => $list)
  135. {
  136. printf("%4d. %s %s\n", $i++, XSUtil::fixWidth($raw, 29), implode(", ", $list));
  137. }
  138. }
  139. }
  140. else if ($correct !== null)
  141. {
  142. $result = $search->getCorrectedQuery($query);
  143. if (count($result) === 0)
  144. {
  145. echo "目前对\033[7m" . $query . "\033[m还没有更好的修正方案。\n";
  146. }
  147. else
  148. {
  149. echo "您可以试试找:\033[4m" . implode("\033[m \033[4m", $result) . "\033[m\n";
  150. }
  151. }
  152. else if ($suggest !== null)
  153. {
  154. $result = $search->getExpandedQuery($query, $limit1);
  155. if (count($result) === 0)
  156. {
  157. echo "目前对\033[7m" . $query . "\033[m还没有任何搜索建议。\n";
  158. }
  159. else
  160. {
  161. echo "展开\033[7m" . $query . "\033[m得到以下搜索建议:\n";
  162. for ($i = 0; $i < count($result); $i++)
  163. {
  164. printf("%d. %s\n", $i + 1, $result[$i]);
  165. }
  166. }
  167. }
  168. else if ($related !== null)
  169. {
  170. $result = $search->getRelatedQuery($query, $limit1);
  171. if (count($result) === 0)
  172. {
  173. echo "目前还没有与\033[7m" . $query . "\033[m相关的搜索词。\n";
  174. }
  175. else
  176. {
  177. echo "与\033[7m" . $query . "\033[m相关的搜索词:\n";
  178. for ($i = 0; $i < count($result); $i++)
  179. {
  180. printf("%d. %s\n", $i + 1, $result[$i]);
  181. }
  182. }
  183. }
  184. else
  185. {
  186. // fuzzy search
  187. if (XSUtil::getOpt(null, 'fuzzy') !== null)
  188. $search->setFuzzy();
  189. if (XSUtil::getOpt(null, 'synonym') !== null)
  190. $search->setAutoSynonyms();
  191. if (($pos = strpos($limit, ',')) === false)
  192. $offset = 0;
  193. else
  194. {
  195. $limit1 = intval(substr($limit, $pos + 1));
  196. $offset = intval($limit);
  197. }
  198. // special fields
  199. $fid = $xs->getFieldId();
  200. $ftitle = $xs->getFieldTitle();
  201. $fbody = $xs->getFieldBody();
  202. if ($fbody)
  203. $xs->getFieldBody()->cutlen = 100;
  204. // preform search
  205. $begin = microtime(true);
  206. $result = $search->setQuery($query)->setLimit($limit1, $offset)->search();
  207. $cost = microtime(true) - $begin;
  208. $matched = $search->getLastCount();
  209. $total = $search->getDbTotal();
  210. // show query?
  211. if (XSUtil::getOpt(null, 'show-query') !== null)
  212. {
  213. echo str_repeat("-", 20) . "\n";
  214. echo "解析后的 QUERY 语句:" . $search->getQuery() . "\n";
  215. echo str_repeat("-", 20) . "\n";
  216. }
  217. // related & corrected
  218. $correct = $search->getCorrectedQuery();
  219. $related = $search->getRelatedQuery();
  220. // info
  221. printf("在 %s 条数据中,大约有 %d 条包含 \033[7m%s\033[m ,第 %d-%d 条,用时:%.4f 秒。\n", number_format($total), $matched, $query, min($matched, $offset + 1), min($matched, $limit1 + $offset), $cost);
  222. // correct
  223. if (count($correct) > 0)
  224. echo "您是不是想找:\033[4m" . implode("\033[m \033[4m", $correct) . "\033[m\n";
  225. // show result
  226. foreach ($result as $doc) /* @var $doc XSDocument */
  227. {
  228. // body & title
  229. $body = $title = '';
  230. if ($ftitle !== false)
  231. $title = cli_highlight($doc->f($ftitle));
  232. if ($fbody !== false)
  233. $body = cli_highlight($doc->f($fbody)) . "\n";
  234. // main fields
  235. printf("\n%d. \033[4m%s#%s# [%d%%]\033[m\n", $doc->rank(), $title, $doc->f($fid), $doc->percent());
  236. echo $body;
  237. // other fields
  238. $line = '';
  239. foreach ($xs->getAllFields() as $field) /* @var $field XSFieldMeta */
  240. {
  241. if ($field->isSpeical())
  242. continue;
  243. $tmp = ucfirst($field->name) . ':' . cli_highlight($doc->f($field));
  244. if ((strlen($tmp) + strlen($line)) > 80)
  245. {
  246. if (strlen($line) > 0)
  247. {
  248. echo $line . "\n";
  249. $line = '';
  250. }
  251. echo $tmp . "\n";
  252. }
  253. else
  254. {
  255. $line .= $tmp . ' ';
  256. }
  257. }
  258. if (strlen($line) > 0)
  259. echo $line . "\n";
  260. }
  261. // related
  262. if (count($related) > 0)
  263. echo "\n相关搜索:\033[4m" . implode("\033[m \033[4m", $related) . "\033[m\n";
  264. echo "\n";
  265. }
  266. }
  267. catch (XSException $e)
  268. {
  269. // Exception
  270. $start = dirname(dirname(__FILE__));
  271. $relative = XSException::getRelPath($start);
  272. $traceString = $e->getTraceAsString();
  273. $traceString = str_replace(dirname(__FILE__) . '/', '', $traceString);
  274. $traceString = str_replace($start . ($relative === '' ? '/' : ''), $relative, $traceString);
  275. echo $e . "\n" . $traceString . "\n";
  276. }
  277. // local highlight function
  278. function cli_highlight($str)
  279. {
  280. global $search;
  281. $str = $search->highlight($str);
  282. $str = preg_replace('#<em>(.+?)</em>#', "\033[7m\\1\033[m", $str) . ' ';
  283. $str = strtr($str, array('<em>' => '', '</em>' => ''));
  284. return $str;
  285. }