|
@@ -6,7 +6,7 @@
|
|
|
*/
|
|
|
|
|
|
module.exports = {
|
|
|
- crawlData,
|
|
|
+ crawlData,
|
|
|
};
|
|
|
|
|
|
const axios = require('axios');
|
|
@@ -75,7 +75,6 @@ const defaultAreas = [
|
|
|
'潼南区',
|
|
|
'荣昌区1',
|
|
|
'荣昌区2',
|
|
|
- '武隆区',
|
|
|
'武隆区1',
|
|
|
'武隆区2',
|
|
|
'武隆区3',
|
|
@@ -94,7 +93,8 @@ const subAreaMap = {
|
|
|
'大足区',
|
|
|
'綦江区',
|
|
|
'南川区',
|
|
|
- '荣昌区',
|
|
|
+ '荣昌区1',
|
|
|
+ '荣昌区2',
|
|
|
'铜梁区',
|
|
|
'璧山区',
|
|
|
'潼南区',
|
|
@@ -116,27 +116,41 @@ const subAreaMap = {
|
|
|
],
|
|
|
'渝东南区': [
|
|
|
'黔江区',
|
|
|
- '武隆区',
|
|
|
+ '武隆区1',
|
|
|
+ '武隆区2',
|
|
|
+ '武隆区3',
|
|
|
+ '武隆区4',
|
|
|
+ '武隆区5',
|
|
|
+ '武隆区6',
|
|
|
'石柱县',
|
|
|
- '彭水县',
|
|
|
+ '彭水县1',
|
|
|
+ '彭水县2',
|
|
|
+ '彭水县3',
|
|
|
'酉阳县',
|
|
|
'秀山县',
|
|
|
],
|
|
|
}
|
|
|
|
|
|
+const TIME_OUT = 60000;
|
|
|
+
|
|
|
// 创建axios实例
|
|
|
const axiosInstance = axios.create({
|
|
|
- baseURL: 'http://www.cqsgczjxx.org/Service/MaterialPriceQuerySvr.svrx/',
|
|
|
+ baseURL: 'http://www.cqsgczjxx.org/',
|
|
|
timeout: TIME_OUT,
|
|
|
+/* proxy: {
|
|
|
+ host: "127.0.0.1", port: "8888" // Fiddler抓包,需要打开Fiddler否则会报connect error
|
|
|
+ }, */
|
|
|
headers: {
|
|
|
'Cache-Control': 'max-age=0',
|
|
|
- 'Content-Type': 'application/x-www-form-urlencoded',
|
|
|
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
|
|
|
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
|
|
|
+ 'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
|
+ 'X-Requested-With': 'XMLHttpRequest',
|
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
|
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6',
|
|
|
+ // 'Cookie': 'ASP.NET_SessionId=uozdrp0hep5x344vq153muju'
|
|
|
},
|
|
|
- responseType: 'document'
|
|
|
+ // responseType: 'json'
|
|
|
});
|
|
|
|
|
|
// 响应拦截器
|
|
@@ -175,10 +189,52 @@ function month2quarter(period) {
|
|
|
return period;
|
|
|
}
|
|
|
|
|
|
+function setTimeoutSync(handle, time) {
|
|
|
+ return new Promise((resolve, reject) => {
|
|
|
+ setTimeout(() => {
|
|
|
+ if (handle && typeof handle === 'function') {
|
|
|
+ handle();
|
|
|
+ }
|
|
|
+ resolve();
|
|
|
+ }, time);
|
|
|
+ });
|
|
|
+}
|
|
|
+
|
|
|
+// 目标网站做了反爬虫处理,请求需要携带cookie(sessionID),否则会报500错误
|
|
|
+let curCookie = '';
|
|
|
+
|
|
|
+async function getCookie() {
|
|
|
+ const indexRes = await axiosInstance.get('/Pages/CQZJW/index.aspx', null, { responseType: 'document' });
|
|
|
+ const cookies = indexRes.headers['set-cookie'];
|
|
|
+ return Object.prototype.toString.call(cookies) === '[object Array]'
|
|
|
+ ? cookies[0].split(';')[0]
|
|
|
+ : cookies || 'ASP.NET_SessionId=cbwzceh5pxzim13gyesho5af';
|
|
|
+}
|
|
|
+
|
|
|
+async function post(url, body) {
|
|
|
+ const cookie = curCookie ? curCookie : await getCookie();
|
|
|
+ curCookie = cookie;
|
|
|
+ const extendConfig = { headers: { Cookie: cookie } };
|
|
|
+ const serviceUrl = `/Service/MaterialPriceQuerySvr.svrx${url}`
|
|
|
+ let res = await axiosInstance.post(serviceUrl, querystring.stringify(body), extendConfig);
|
|
|
+ while (res && typeof res.data === 'string' && /<!doctype html>/.test(res.data)) {
|
|
|
+ // 有时候请求会返回302,需要重新发请求
|
|
|
+ await setTimeoutSync(null, 500);
|
|
|
+ res = await axiosInstance.post(serviceUrl, querystring.stringify(body), extendConfig);
|
|
|
+ }
|
|
|
+ if (typeof res.data === 'string' && /<!doctype html>/.test(res.data)) {
|
|
|
+ console.log(serviceUrl);
|
|
|
+ console.log(body);
|
|
|
+ console.log(res.data);
|
|
|
+ console.log('==================================')
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
// 获取材料价格
|
|
|
async function queryPrice(period, area, groupType, classify) {
|
|
|
const body = {
|
|
|
- period,
|
|
|
+ period: period.replace('-', ''),
|
|
|
area: area || '',
|
|
|
groupType: groupType || '',
|
|
|
classify: classify || '',
|
|
@@ -189,22 +245,33 @@ async function queryPrice(period, area, groupType, classify) {
|
|
|
option: 0,
|
|
|
token: ''
|
|
|
};
|
|
|
- const res = await axiosInstance.post('/QueryInfoPrice', querystring.stringify(body));
|
|
|
- return res && res.Data && res.Data._Items || [];
|
|
|
+ const res = await post('/QueryInfoPrice', body);
|
|
|
+ return res && res.data && res.data.Data && res.data.Data._Items || [];
|
|
|
}
|
|
|
|
|
|
// 获取地区信息
|
|
|
async function queryArea(period, groupType) {
|
|
|
const body = {
|
|
|
groupType,
|
|
|
- period,
|
|
|
+ period: period.replace('-', ''),
|
|
|
token: ''
|
|
|
};
|
|
|
- const res = await axiosInstance.post('/QueryArea', querystring.stringify(body));
|
|
|
- const areaData = res && res.Data && res.Data._Items || [];
|
|
|
+ const res = await post('/QueryArea', body);
|
|
|
+ const areaData = res && res.data && res.data.Data && res.data.Data._Items || [];
|
|
|
return areaData.map(item => item.Area);
|
|
|
}
|
|
|
|
|
|
+// 获取分类信息
|
|
|
+async function queryKind(groupType, period) {
|
|
|
+ const body = {
|
|
|
+ groupType,
|
|
|
+ period: period.replace('-', ''),
|
|
|
+ token: ''
|
|
|
+ }
|
|
|
+ const res = await post('/QueryKind', body);
|
|
|
+ return res && res.data && res.data.Data || [];
|
|
|
+}
|
|
|
+
|
|
|
// 爬取人工价格
|
|
|
async function crawlLabour(period) {
|
|
|
const groupType = '人工信息价';
|
|
@@ -213,13 +280,14 @@ async function crawlLabour(period) {
|
|
|
const rst = [];
|
|
|
for (const rootArea of areas) {
|
|
|
const priceItems = await queryPrice(quater, rootArea, groupType);
|
|
|
+ priceItems.forEach(item => item.Unit = '工日');
|
|
|
const subAreas = subAreaMap[rootArea];
|
|
|
if (subAreas) {
|
|
|
subAreas.forEach(area => {
|
|
|
rst.push({ area, data: [{ classify: '人工', priceItems }] });
|
|
|
});
|
|
|
} else {
|
|
|
- rst.push({ rootArea, data: [{ classify: '人工', priceItems }] });
|
|
|
+ rst.push({ area: rootArea, data: [{ classify: '人工', priceItems }] });
|
|
|
}
|
|
|
}
|
|
|
return rst;
|
|
@@ -255,8 +323,8 @@ async function crawlBetonMaterial(period) {
|
|
|
async function crawlBuldingMaterial(period) {
|
|
|
const groupType = '建筑工程材料价格';
|
|
|
// 根据期数获取建安工程材料分类
|
|
|
- const { Data: kinds } = await axiosInstance.post('/QueryKind', querystring.stringify({ groupType, period, token: '' }));
|
|
|
- if (kinds || !kinds.length) {
|
|
|
+ const kinds = await queryKind(groupType, period);
|
|
|
+ if (!kinds || !kinds.length) {
|
|
|
return [];
|
|
|
}
|
|
|
const rst = [];
|
|
@@ -306,8 +374,8 @@ async function crawlGardenMateiral(period) {
|
|
|
const isDuplicate = duplicateReg.test(item.TaxPrice) || duplicateReg.test(item.NoTaxPrice);
|
|
|
if (isDuplicate) {
|
|
|
// 分成最高低价最高价数据
|
|
|
- const taxPriceList = item.TaxPrice.split('-');
|
|
|
- const noTaxPriceList = item.NoTaxPrice.split('-');
|
|
|
+ const taxPriceList = item.TaxPrice ? item.TaxPrice.split('-') : [''];
|
|
|
+ const noTaxPriceList = item.NoTaxPrice ? item.NoTaxPrice.split('-') : [''];
|
|
|
const minItem = {
|
|
|
...item,
|
|
|
Name: `${item.Name}-最低价`,
|
|
@@ -333,8 +401,8 @@ async function crawlGardenMateiral(period) {
|
|
|
async function crawlEnergyMateiral(period) {
|
|
|
const groupType = '绿色、节能建筑材料价格';
|
|
|
// 获取分类
|
|
|
- const { Data: kinds } = await axiosInstance.post('/QueryKind', querystring.stringify({ groupType, period, token: '' }));
|
|
|
- if (kinds || !kinds.length) {
|
|
|
+ const kinds = await queryKind(groupType, period);
|
|
|
+ if (!kinds || !kinds.length) {
|
|
|
return [];
|
|
|
}
|
|
|
const rootClass = { classify: '绿色、节能建筑工程材料', subClass: [] };
|
|
@@ -351,8 +419,8 @@ async function crawlPrefabricatedMateiral(period) {
|
|
|
const groupType = '装配式建筑材料价格';
|
|
|
// 获取分类
|
|
|
const quater = month2quarter(period);
|
|
|
- const { Data: kinds } = await axiosInstance.post('/QueryKind', querystring.stringify({ groupType, period: quater, token: '' }));
|
|
|
- if (kinds || !kinds.length) {
|
|
|
+ const kinds = await queryKind(groupType, quater);
|
|
|
+ if (!kinds || !kinds.length) {
|
|
|
return [];
|
|
|
}
|
|
|
const rootClass = { classify: '装配式建筑工程成品构件', subClass: [] };
|
|
@@ -369,8 +437,8 @@ async function crawlTrackMateiral(period) {
|
|
|
const groupType = '轨道材料价格';
|
|
|
// 获取分类
|
|
|
const quater = month2quarter(period);
|
|
|
- const { Data: kinds } = await axiosInstance.post('/QueryKind', querystring.stringify({ groupType, period: quater, token: '' }));
|
|
|
- if (kinds || !kinds.length) {
|
|
|
+ const kinds = await queryKind(groupType, quater);
|
|
|
+ if (!kinds || !kinds.length) {
|
|
|
return [];
|
|
|
}
|
|
|
const rootClass = { classify: '城市轨道交通工程材料', subClass: [] };
|
|
@@ -484,16 +552,16 @@ async function save(allData, period, compilationID) {
|
|
|
allData.forEach(({ area, data }) => {
|
|
|
(areaMap[area] || (areaMap[area] = [])).push(...data);
|
|
|
});
|
|
|
- const libData = { period, compilationID, ID: v1(), name: `信息价${period}`, createDate: Date.now() };
|
|
|
+ const libData = { period, compilationID, ID: v1(), name: `信息价(${period})`, createDate: Date.now() };
|
|
|
const curAreas = await priceInfoAreaModel.find({ compilationID }).sort({ serialNo: 1 }).lean();
|
|
|
- const maxSerialNo = curAreas.length ? curAreas[curAreas.length - 1].serialNo || 0 : 0;
|
|
|
+ let maxSerialNo = curAreas.length ? curAreas[curAreas.length - 1].serialNo || 0 : 0;
|
|
|
const areaData = [];
|
|
|
const classData = [];
|
|
|
const priceData = [];
|
|
|
for (const area in areaMap) {
|
|
|
let curArea = curAreas.find(cArea => cArea.name === area);
|
|
|
if (!curArea) {
|
|
|
- curArea = { compilationID, ID: v1(), serialNo: ++maxSerialNo };
|
|
|
+ curArea = { compilationID, ID: v1(), serialNo: ++maxSerialNo, name: area };
|
|
|
areaData.push(curArea);
|
|
|
}
|
|
|
const data = areaMap[area];
|
|
@@ -505,7 +573,9 @@ async function save(allData, period, compilationID) {
|
|
|
if (pre) {
|
|
|
pre.classObj.NextSiblingID = item.classObj.ID;
|
|
|
}
|
|
|
- priceData.push(...transformPriceItems(period, compilationID, libData.ID, curArea.ID, item.classObj.ID, item.priceItems));
|
|
|
+ if (item.priceItems) {
|
|
|
+ priceData.push(...transformPriceItems(period, compilationID, libData.ID, curArea.ID, item.classObj.ID, item.priceItems));
|
|
|
+ }
|
|
|
if (item.subClass && item.subClass.length) {
|
|
|
item.subClass.forEach((child, cIndex) => {
|
|
|
child.classObj = { libID: libData.ID, areaID: curArea.ID, ID: v1(), ParentID: item.classObj.ID, NextSiblingID: '-1', name: child.classify };
|
|
@@ -514,7 +584,9 @@ async function save(allData, period, compilationID) {
|
|
|
if (preChild) {
|
|
|
preChild.classObj.NextSiblingID = child.classObj.ID;
|
|
|
}
|
|
|
- priceData.push(...transformPriceItems(period, compilationID, libData.ID, curArea.ID, child.classObj.ID, child.priceItems));
|
|
|
+ if (child.priceItems) {
|
|
|
+ priceData.push(...transformPriceItems(period, compilationID, libData.ID, curArea.ID, child.classObj.ID, child.priceItems));
|
|
|
+ }
|
|
|
});
|
|
|
}
|
|
|
});
|