Bladeren bron

feat: 信息价相关

zhangweicheng 4 jaren geleden
bovenliggende
commit
bf919ef8e8
4 gewijzigde bestanden met toevoegingen van 95 en 28 verwijderingen
  1. 1 0
      modules/dict/custom.utf8
  2. 4 0
      modules/dict/jieba.js
  3. 88 27
      modules/main/facade/info_price_facade.js
  4. 2 1
      package.json

+ 1 - 0
modules/dict/custom.utf8

@@ -0,0 +1 @@
+硅酸盐水泥

+ 4 - 0
modules/dict/jieba.js

@@ -0,0 +1,4 @@
+let path = require('path');
+let nodejieba = require('nodejieba');
+nodejieba.load({userDict:path.join(__dirname,'/custom.utf8')})
+module.exports=nodejieba 

+ 88 - 27
modules/main/facade/info_price_facade.js

@@ -25,6 +25,13 @@ var segment = new Segment();
 // 使用默认的识别模块及字典,载入字典文件需要1秒,仅初始化时执行一次即可
 segment.useDefault();
 
+let nodejieba = require('../../dict/jieba');
+
+let nameWeightMap ={ 
+  '普通':-99
+}
+
+
 
 async function getOptions(data,compilation){//data 是预留对象,暂时不用
   let compilationID = compilation._id;
@@ -127,15 +134,32 @@ async function getDataByCondition(data,compilation){
 async function getDataByCode(code, data) { 
   let condition = { ...data.condition };
   condition.code = code;
-  let totalSize = await infoItemsModel.find(condition).count();
+ /*  
+  2021-03-31 先按编号去取,所有匹配结果再过滤,不能再分类了
+ let totalSize = await infoItemsModel.find(condition).count();
   if (data.lastID) { //有最后一行说明是查询下一页
     condition["_id"] = {$gt:mongoose.Types.ObjectId(data.lastID)};
   }
   let items = [];
   if (totalSize > 0) { 
     items = await infoItemsModel.find(condition).lean().sort({"_id":1}).limit(50);
-  }
+  } */
    
+ //新需求 --------- 
+  let  allItems = await infoItemsModel.find(condition).lean().sort({"_id":1});
+  let items = [];
+  if(data.keyWord && allItems.length > 1){
+    for(let item of allItems){
+      if(item.name.indexOf(data.keyWord) != -1) items.push(item) //有完全匹配的,就不用编码下的返回所有数据了
+    }
+  }
+  //没有完全匹配的,返回所有
+  if (items.length === 0) items = allItems;
+
+  let totalSize = items.length;
+
+  //新需求结束 --------- 
+
   return {totalSize,items}
 }
 
@@ -177,6 +201,52 @@ function handelThreeWord(word){
   }
 }
 
+function getShortNameArray(nameArray){
+  let newArray = [];
+  for(let n of nameArray){
+    if(n.length >= 5){
+      newArray.push(...nodejieba.cutSmall(n,3)) 
+    }else{
+      newArray.push(n);
+    }
+  }
+  return newArray;
+
+}
+
+function getMatchPrice(allInfoPrice,nameArray,needHandleLongWord = true){
+  let items = [];
+  let maxNum = 0;//最大匹配数
+  let matchMap = {};//匹配储存
+  let handleLongWord = false;
+  if(needHandleLongWord){
+    for(let na of nameArray){
+      if(na.length >= 5) handleLongWord = true;
+    }
+  }
+  
+  for (let info of allInfoPrice) { 
+    //specs
+    let mstring = info.name + info.specs;
+    mstring = mstring.replace(/混凝土/g, "砼");
+    info.mstring = mstring;
+    let matchCount = 0;
+    for (let na of nameArray) { 
+      if (mstring.indexOf(na) != -1) { 
+        matchCount++;
+        if(needHandleLongWord && na.length >= 5) handleLongWord = false//有5个字的,并且匹配上了,这里就为false不用再处理一次了
+      }
+    }  
+    if (matchCount > 0) { 
+      matchMap[matchCount] ? matchMap[matchCount].push(info) : matchMap[matchCount] = [info];
+      if (matchCount > maxNum) maxNum = matchCount;
+    }
+  }
+  if (maxNum > 0) items = matchMap[maxNum];
+  totalSize = items.length
+  return {totalSize,items,handleLongWord};
+}
+
 //自定义特殊处理
 function cusSegment(nameArray,keyword){
   let temArr = [];
@@ -201,15 +271,12 @@ function cusSegment(nameArray,keyword){
 
 //模糊匹配
 async function getDataByFuzzyMatch(keyword, data){
-  let items = [];
+
   let nameArray = [];
   if (keyword.length < 3) {
     nameArray.push(keyword)
   } else { 
-    nameArray = segment.doSegment(keyword, {
-      simple: true, //不返回词性
-      stripPunctuation: true //去除标点符号
-    });
+    nameArray = nodejieba.cut(keyword)
   }
  
   //自定义处理
@@ -217,29 +284,23 @@ async function getDataByFuzzyMatch(keyword, data){
 
   console.log(nameArray);
 
-  let allInfoPrice = await infoItemsModel.find(data.condition).lean().sort({"_id":1});
 
-  let maxNum = 0;//最大匹配数
-  let matchMap = {};//匹配储存
+  let allInfoPrice = await infoItemsModel.find(data.condition).lean().sort({"_id":1});
 
-  for (let info of allInfoPrice) { 
-    //specs
-    let mstring = info.name + info.specs;
-    mstring = mstring.replace(/混凝土/g, "砼");
-    info.mstring = mstring;
-    let matchCount = 0;
-    for (let na of nameArray) { 
-      if (mstring.indexOf(na) != -1) { 
-        matchCount++;
-      }
-    }  
-    if (matchCount > 0) { 
-      matchMap[matchCount] ? matchMap[matchCount].push(info) : matchMap[matchCount] = [info];
-      if (matchCount > maxNum) maxNum = matchCount;
-    }
+  let {totalSize,items,handleLongWord}  = getMatchPrice(allInfoPrice,nameArray)
+  if(handleLongWord === true){
+    nameArray = getShortNameArray(nameArray);
+    console.log(`二次匹配:[${nameArray}]`);
+    let newResult  = getMatchPrice(allInfoPrice,nameArray,false)
+    totalSize = newResult.totalSize;
+    items = newResult.items;
   }
-  if (maxNum > 0) items = matchMap[maxNum];
-  totalSize = items.length
+
+  //关键词按权重排序,为了给结果排序
+  nameArray = _.sortBy(nameArray,(name)=>{
+    if(nameWeightMap[name]) return nameWeightMap[name]*-1;//sortBy是升序排序,我们要的是权重越小排到越后即倒序 所以这里乘以-1
+    return 1
+  })
 
   //按匹配位置排序 如[ '橡胶', '胶圈', '给水' ] 先显示橡胶
   items = _.sortBy(items,(item)=>{

+ 2 - 1
package.json

@@ -54,7 +54,8 @@
     "socket.io": "2.0.3",
     "ua-parser-js": "^0.7.14",
     "uuid": "^3.1.0",
-    "wiredep": "^4.0.0"
+    "wiredep": "^4.0.0",
+    "nodejieba": "^2.5.1"
   },
   "scripts": {
     "start": "node server.js",