| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494 | /* * @Descripttion: 广东材料信息价格获取(通过造价通接口) * @Author: vian * @Date: 2020-09-29 11:22:59 */module.exports = {  crawlData,};const uuidV1 = require('uuid/v1');const mongoose = require('mongoose');const axios = require('axios');const querystring = require('querystring');const priceInfoLibModel = mongoose.model('std_price_info_lib');const priceInfoClassModel = mongoose.model('std_price_info_class');const priceInfoAreaModel = mongoose.model('std_price_info_areas');const priceInfoItemModel = mongoose.model('std_price_info_items');const priceInfoSourceModel = mongoose.model('std_price_info_source');const gljLibModel = mongoose.model('std_glj_lib_map');const gljClassModel = mongoose.model('std_glj_lib_gljClass');// 造价通网页上整理的地区https://gd.zjtcn.com/gov/c_cs_d_t_p1.htmlconst areas = [  { city: '广州市', county: '广州市' },  { city: '广州市', county: '花都区' },  { city: '广州市', county: '增城市' },  { city: '广州市', county: '从化市' },  { city: '韶关市', county: '韶关市' },  { city: '韶关市', county: '始兴县' },  { city: '韶关市', county: '翁源县' },  { city: '韶关市', county: '新丰县' },  { city: '韶关市', county: '乐昌市' },  { city: '韶关市', county: '南雄市' },  { city: '深圳市', county: '深圳市' },  { city: '珠海市', county: '珠海市' },  { city: '汕头市', county: '汕头市' },  { city: '汕头市', county: '濠江区' },  { city: '汕头市', county: '潮阳区' },  { city: '汕头市', county: '潮南区' },  { city: '汕头市', county: '澄海区' },  { city: '汕头市', county: '南澳县' },  { city: '佛山市', county: '佛山市' },  { city: '佛山市', county: '南海区' },  { city: '佛山市', county: '顺德区' },  { city: '江门市', county: '江门市' },  { city: '江门市', county: '新会区' },  { city: '江门市', county: '台山市' },  { city: '江门市', county: '开平市' },  { city: '江门市', county: '鹤山市' },  { city: '江门市', county: '恩平市' },  { city: '湛江市', county: '湛江市' },  { city: '湛江市', county: '遂溪县' },  { city: '湛江市', county: '徐闻县' },  { city: '湛江市', county: '廉江市' },  { city: '湛江市', county: '雷州市' },  { city: '湛江市', county: '吴川市' },  { city: '茂名市', county: '茂名市' },  { city: '茂名市', county: '电白市' },  { city: '茂名市', county: '高州市' },  { city: '茂名市', county: '化州市' },  { city: '茂名市', county: '信宜市' },  { city: '肇庆市', county: '肇庆市' },  { city: '肇庆市', county: '鼎湖区' },  { city: '肇庆市', county: '广宁县' },  { city: '肇庆市', county: '怀集县' },  { city: '肇庆市', county: '封开县' },  { city: '肇庆市', county: '德庆县' },  { city: '肇庆市', county: '高要市' },  { city: '肇庆市', county: '四会市' },  { city: '惠州市', county: '惠州市' },  { city: '惠州市', county: '惠阳区' },  { city: '惠州市', county: '大亚湾开发区' },  { city: '惠州市', county: '博罗县' },  { city: '惠州市', county: '惠东县' },  { city: '惠州市', county: '龙门县' },  { city: '梅州市', county: '梅州市' },  { city: '梅州市', county: '梅县' },  { city: '梅州市', county: '大埔县' },  { city: '梅州市', county: '丰顺县' },  { city: '梅州市', county: '平远县' },  { city: '汕尾市', county: '汕尾市' },  { city: '汕尾市', county: '海丰县' },  { city: '汕尾市', county: '陆河县' },  { city: '河源市', county: '河源市' },  { city: '河源市', county: '紫金县' },  { city: '河源市', county: '龙川县' },  { city: '河源市', county: '连平县' },  { city: '河源市', county: '和平县' },  { city: '河源市', county: '东源县' },  { city: '阳江市', county: '阳江市' },  { city: '阳江市', county: '海陵岛区' },  { city: '阳江市', county: '阳西县' },  { city: '阳江市', county: '阳春市' },  { city: '清远市', county: '清远市' },  { city: '清远市', county: '佛冈县' },  { city: '清远市', county: '阳山县' },  { city: '清远市', county: '连山县' },  { city: '清远市', county: '连南县' },  { city: '清远市', county: '英德市' },  { city: '清远市', county: '连州市' },  { city: '东莞市', county: '东莞市' },  { city: '中山市', county: '中山市' },  { city: '潮州市', county: '潮州市' },  { city: '潮州市', county: '潮安县' },  { city: '潮州市', county: '饶平县' },  { city: '揭阳市', county: '揭阳市' },  { city: '揭阳市', county: '揭西县' },  { city: '揭阳市', county: '惠来县' },  { city: '揭阳市', county: '普宁市' },  { city: '云浮市', county: '云浮市' },  { city: '云浮市', county: '新兴县' },  { city: '云浮市', county: '郁南县' },  { city: '云浮市', county: '罗定市' },];const TIME_OUT = 120000;// 创建axios实例const axiosConfig = {  baseURL: 'http://api.zjtcn.com/user',  timeout: TIME_OUT,  /* proxy: {    host: "127.0.0.1", port: "8888" // Fiddler抓包,需要打开Fiddler否则会报connect error  }, */  headers: {    'Cache-Control': 'max-age=0',    'Content-Type': 'application/x-www-form-urlencoded',    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',    'Accept-Encoding': 'gzip, deflate',    'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6',  },  //responseType: 'document'};const axiosInstance = axios.create(axiosConfig);console.log('axiosConfig=-========================================');console.log(axiosConfig);// 响应拦截器axiosInstance.interceptors.response.use(function (response) {  return response.data;}, function (error) {  // 对响应错误做点什么  if (error.message.includes('timeout')) {    return Promise.reject(`目标网络超时,请稍后再试。(${TIME_OUT}ms)`);  } else {    return Promise.reject(error);  }});async function post(url, data) {  if (url === '/dyn_code') {    console.log(axiosInstance);  }  return await axiosInstance.post(url, querystring.stringify(data));}// 有效期一年,通过购买造价通服务获得const SERVICE_ID = '2020090003';const SERVICE_KEY = '97F2A441633F10DFEB5BFC29B3862847';// 获取后续获取信息价期刊必要的tokenasync function getToken() {  // 获取动态码  const dynData = await post('/dyn_code', { service_id: SERVICE_ID });  if (!dynData.response_code) {    throw `错误代号${dynData.retCode} 获取动态码失败。`;  }  // 获取加密字符串  const serviceSecret = await post('/aes', { service_id: SERVICE_ID, service_key: SERVICE_KEY, service_code: dynData.response_code });  if (typeof serviceSecret !== 'string') {    throw `错误代号${serviceSecret.retCode} 获取加密字符串错误。`;  }  const tokenData = await post('/authentication', { service_id: SERVICE_ID, service_secret: serviceSecret });  if (!tokenData.token) {    throw `错误代号${tokenData.retCode} 获取token失败。`  }  return tokenData.token;}const monthMap = {  '1': '01月',  '2': '02月',  '3': '03月',  '4': '04月',  '5': '05月',  '6': '06月',  '7': '07月',  '8': '08月',  '9': '09月',  '10': '10月',  '11': '11月',  '12': '12月',};// 根据期数范围,获取期数数据function getPeriodData(from, to) {  if (from > to) {    return null;  }  const reg = /(\d+)-(\d+)/;  const fromMatch = from.match(reg);  const fromYear = +fromMatch[1];  const fromMonth = +fromMatch[2];  const toMatch = to.match(reg);  const toYear = +toMatch[1];  const toMonth = +toMatch[2];  let curYear = fromYear;  let curMonth = fromMonth;  const periods = [];  while (curYear <= toYear && curMonth <= toMonth) {    periods.push(`${curYear}年-${monthMap[curMonth]}`);    if (curMonth === 12) {      curYear++;      curMonth = 1;    } else {      curMonth++;    }  }  return periods;}// 根据期刊数据,获取需要信息价接口需要的datefunction getDateForApi(journalList, period) {  const monthPeriod = `${period}-05`; // 月度  const matchMonth = journalList.find(dateItem => dateItem.date === monthPeriod);  if (matchMonth) {    return matchMonth.date;  }  // 没匹配到月度数据,去匹配季度  const month = period.split('-')[1];  let quaterDate;  if (['1', '2', '3'].includes(month)) {    quaterDate = '03-15';  } else if (['4', '5', '6'].includes(month)) {    quaterDate = '06-15';  } else if (['7', '8', '9'].includes(month)) {    quaterDate = '09-15';  } else if (['10', '11', '12'].includes(month)) {    quaterDate = '12-15';  }  const year = period.split('-')[0];  const matchQuater = journalList.find(dateItem => dateItem.date === `${year}-${quaterDate}`);  if (matchQuater) {    return matchQuater.date;  }  // 没匹配到季度数据,去匹配半年数据  if (month / 6 <= 1 ) {    const firstHalfYear = journalList.find(dateItem => dateItem.date === `${year}-06-25`);    if (firstHalfYear) {      return firstHalfYear.date;    }  }  if (month /6 > 1) {    const secondHalfYear = journalList.find(dateItem => dateItem.date === `${year}-12-25`);    if (secondHalfYear) {      return secondHalfYear.date;    }  }  // 匹配全年数据  const fullYear = journalList.find(dateItem => dateItem.date === `${year}-12-30`);  if (fullYear) {    return fullYear;  }  return monthPeriod;}// 获取信息价async function getPriceInfoSource(token, period, city, county) {  const province = '广东';  const area = `${province}-${city}-${county}`;  const industry = 1;  /* const existData = await priceInfoSourceModel.find({ period, area, industry }).lean();  if (existData.length) {    return existData;  } */  const body = {    token,    province,    city,    county,    industry,    // date: `${period}-05` // 天数05表示请求月度数据  };  // 获取期刊数据  const year = period.split('-')[0];  const journalRst = await post('/gov/journal_list', { ...body, date: year });  const date = journalRst && journalRst.results ? getDateForApi(journalRst.results, period) : `${period}-05`;  const sourceData = await post('/gov/get', { ...body, date });  if (!sourceData.results) {    // 不抛出错误,不同地区更新信息价期刊的时间不同,如果导入数据时,有地区没发布数据,直接跳过并提示    return `retCode: ${sourceData.retCode} ${sourceData.msg} (${period} ${city} ${county})`;  }  // 因为造价通接口请求有次数限制,一个地区只能请求一次,为保险起见,将造价通源数据入库  const insertData = sourceData.results.map(item => ({    period,    area,    industry,    subcid: item.subcid,    code: item.code,    name: item.name,    unit: item.unit,    price: item.price,    taxPrice: item.tax_price,    noTaxPrice: item.no_tax_price,    specs: item.spec,    remark: item.notes,  }));  /* if (insertData.length) {    await priceInfoSourceModel.insertMany(insertData);  } */  return insertData;}// 获取数据subcid与分类名称的映射表async function getClassNameMap(compilationID) {  const gljLib = await gljLibModel.findOne({ compilationId: compilationID }).lean();  if (!gljLib) {    return null;  }  const classData = await gljClassModel.find({ repositoryId: gljLib.ID }).lean();  const map = {};  const reg = /^\d{4}/;  classData.forEach(item => {    const name = item.Name || '';    const matched = name.match(reg);    if (matched) {      map[matched[0]] = name;    }  });  return map;}/** * 将信息价源数据转换入库 * @param {String} compilationID - 费用定额ID * @param {String} period - 期数 eg: 2020年-09月 * @param {String} areaID - 地区ID * @param {Array} sourceData - 造价通源数据 * @param {Object} classNameMap - 从标准人材机分类树获取的编号-名称映射表 * @return {Void} */async function saveData(compilationID, period, areaID, sourceData, classNameMap) {  let lib = await priceInfoLibModel.findOne({ compilationID, period }).lean();  if (!lib) {    lib = {      compilationID,      period,      ID: uuidV1(),      name: `信息价(${period})`,      createDate: Date.now()    };    await priceInfoLibModel.insertMany([lib]);  }  const libID = lib.ID;  // 如果该期数该地区下存在数据,则不处理,防止重复插入数据  // 造价通地区数据更新不同步,可能需要多次导入数据补全一期数据,如果已经有数据,说明该地区已经导入成功过了,直接跳过  const existCount = await priceInfoItemModel.count({ compilationID, period, areaID });  if (existCount) {    return;  }  // 分类数据应为空才对,如果有就清空  const existClassCount = await priceInfoClassModel.count({ libID: lib.ID, areaID });  if (existClassCount) {    await priceInfoClassModel.remove({ libID: lib.ID, areaID });  }  // 导入分类数据及价格信息数据  const otherClassName = '其他';  const curClassMap = {    [otherClassName]: { libID, areaID, ID: uuidV1(), ParentID: '-1', name: otherClassName }  };  const classData = [];  const priceData = [];  const splitReg = /([0-9.]+)-([0-9.]+)/;  let needOtherClass = false;  sourceData.forEach(item => {    const className = classNameMap[item.subcid] || otherClassName;    if (className === otherClassName) {      needOtherClass = true;    }    if (!curClassMap[className]) {      const classItem = { libID, areaID, ID: uuidV1(), ParentID: '-1', NextSiblingID: '-1', name: className };      curClassMap[className] = classItem;      /* const preClassData = classData[classData.length - 1];      if (preClassData) {        preClassData.NextSiblingID = classItem.ID;      } */      classData.push(classItem);    }    const classID = curClassMap[className].ID;    const price = item.price || '';    const matchSplitPrice = price.match(splitReg);    if (matchSplitPrice) { // 价格字段是区间,需要分割成最低价、最高价两条数据      const minPrice = matchSplitPrice[1];      const maxPrice = matchSplitPrice[2];      priceData.push(transfromSourceItemToPriceItem(item, classID, `${item.name}-最低价`, minPrice));      priceData.push(transfromSourceItemToPriceItem(item, classID, `${item.name}-最高价`, maxPrice));    } else {      priceData.push(transfromSourceItemToPriceItem(item, classID, item.name, item.noTaxPrice));    }  });  // 分类按分类编号排序  classData.sort((a, b) => a.name.localeCompare(b.name));  classData.forEach((classItem, index) => {    const preClassItem = classData[index - 1];    if (preClassItem) {      preClassItem.NextSiblingID = classItem.ID;    }  });  if (needOtherClass) {    const otherClassItem = curClassMap[otherClassName];    otherClassItem.NextSiblingID = classData[0].ID;    classData.push(otherClassItem);  }  const task = [    priceInfoClassModel.insertMany(classData),    priceInfoItemModel.insertMany(priceData)  ];  await Promise.all(task);  // 将信息价源数据转换成信息价数据  function transfromSourceItemToPriceItem(sourceItem, classID, name, noTaxPrice) {    if (/(m³)|(m²)/.test(sourceItem.unit)) {      sourceItem.unit = sourceItem.unit.replace(/m³/g, 'm3');      sourceItem.unit = sourceItem.unit.replace(/m²/g, 'm2');    }    return {      compilationID,      period,      name,      noTaxPrice,      classID,      libID,      areaID,      ID: uuidV1(),      code: sourceItem.subcid,      unit: sourceItem.unit,      specs: sourceItem.specs,      taxPrice: sourceItem.taxPrice,      remark: sourceItem.remark,    };  }}/** * 获取数据 * @param {String} from - 从哪一期开始 eg: 2020-01 * @param {String} to - 从哪一期结束 eg: 2020-05 * @param {String} compilationID - 费用定额ID * @return {Object} */async function crawlData(from, to, compilationID) {  const token = await getToken();  const periods = getPeriodData(from, to);  if (!periods) {    throw '无效的期数区间。';  }  const classNameMap = await getClassNameMap(compilationID);  if (!classNameMap) {    throw '无有效的分类数据。';  }  const hintInfos = [];  for (const period of periods) {    const sourcePeriod = period.replace(/年|月/g, '');    for (let i = 0; i < areas.length; i++) {      // 存入地区      const { city, county } = areas[i];      const area = `${city}-${county}`;      let areaItem = await priceInfoAreaModel.findOne({ compilationID, name: area }).lean();      const serialNo = i + 1;      if (!areaItem) {        areaItem = { compilationID, serialNo, ID: uuidV1(), name: area };        await priceInfoAreaModel.insertMany([areaItem]);      } else { // 需求变更,需要排序        await priceInfoAreaModel.update({ ID: areaItem.ID }, { $set: { serialNo } });      }      // 存入信息价相关数据      const sourceData = await getPriceInfoSource(token, sourcePeriod, city, county);      if (typeof sourceData === 'string') {        hintInfos.push(sourceData);        continue;      }      if (!sourceData.length) {        continue;      }      await saveData(compilationID, period, areaItem.ID, sourceData, classNameMap);    }  }  if (hintInfos.length) {    throw hintInfos.join('\n');  }}
 |