找了网上很多资源,不是数据太老数据有问题,就是要收费☹,于是自己抓取了一下阿里的数据
数据源
项目源码
数据都是异步获取写入,所以使用了 axios,node 版本是 12.10.0
引入需要的资源:
import http from 'axios'
import fs from 'fs'
地址和地址拼接函数:
const baseUrl = 'http://datavmap-public.oss-cn-hangzhou.aliyuncs.com/areas/'
function ug(sort, code1, code2 = '00') {return baseUrl + sort + '/51' + code1 + code2 + '.json'}
nodejs 里面异步接口都是回调函数,在这里不太好用,转换成基于 promise 的:
function promisify(fn, t, argsNum = 3) { //t:this args: 参数个数
return (...args) => {let arr = Array.from(args).slice(0, argsNum - 2)
return new Promise((resolve, reject) => {fn.call(t, ...arr, args[argsNum - 2] || {}, (err, res) => {return err ? reject(err) : resolve(res)
})
})
};
}
const rmdir = promisify(fs.rmdir, fs),
mkdir = promisify(fs.mkdir, fs),
appendFile = promisify(fs.appendFile, fs, 4)
promise 更易用,代码可读性也高,相应的性能会下降一些。bluebird 性能做过优化,相对 promise 会高一些,这里数据比较小就先自己搞一哈。
有两种获取方式,并发获取:
const children = [...Array(35).keys()]
const bound = [...Array(100).keys()]
bound.shift()
async function getConcurrent() {
try {await rmdir('./map', {recursive: true})
await mkdir('./map', {recursive: true})
} catch (e) {console.log(e)
}
children.map(async (i) => {let code1 = i.toString().padStart(2, '0')
try {let r = await http.get(ug('children', code1))
await (async (r) => {await console.log('获取 children/51' + code1 + '00 成功')
await appendFile('./map/51' + code1 + '00.json', JSON.stringify(r.data))
console.log('保存 children/51' + code1 + '00 成功')
})(r)
} catch (e) { }
bound.map(async (j) => {let code2 = j.toString().padStart(2, '0')
try {let r = await http.get(ug('bound', code1, code2))
await (async (r) => {await console.log('### 获取 bound/51' + code1 + code2 + '成功')
await appendFile('./map/51' + code1 + code2 + '.json', JSON.stringify(r.data))
console.log('### 保存 bound/51' + code1 + code2 + '成功')
})(r)
} catch (e) {}})
})
}
继发:
async function getSuccessive() {
try {await rmdir('./map', {recursive: true})
await mkdir('./map', {recursive: true})
} catch (e) {console.log(e)
}
for (let i = 0; i < 35; i++) {let code1 = i.toString().padStart(2, '0')
try {let r = await http.get(ug('children', code1))
await (async (r) => {await console.log('获取 children/51' + code1 + '00 成功')
await appendFile('./map/51' + code1 + '00.json', JSON.stringify(r.data))
console.log('保存 children/51' + code1 + '00 成功')
})(r)
} catch (e) { }
for (let j = 1; j < 100; j++) {let code2 = j.toString().padStart(2, '0')
try {let r = await http.get(ug('bound', code1, code2))
await (async (r) => {await console.log('### 获取 bound/51' + code1 + code2 + '成功')
await appendFile('./map/51' + code1 + code2 + '.json', JSON.stringify(r.data))
console.log('### 保存 bound/51' + code1 + code2 + '成功')
})(r)
} catch (e) {}}
}
}
并发明显比继发快很多
运行:
function run(option = false) {option ? getSuccessive() : getConcurrent()}
run()
抓取其他地区的修改下数据和编号就可以,后续会抓取国家地理信息网的数据