【九月打卡】第53天 TypeScript(9)
标签:
Typescript
TS编写爬虫工具
- 抓取html文本(通过superagent模块)
- 解析html节点内容并进行处理(通过cheerio模块,类似于jquery语法)
- 读取文件并写入处理后的内容到文件中
// 抓取html
import superagent from 'superagent';
// 获取节点元素
import * as cheerio from 'cheerio';
import fs from 'fs';
import path from 'path';
interface Course {
title: string;
count: number;
}
interface CourseInfo {
time: number;
data: Course[];
}
interface FileContent {
[prop: number]: Course[];
}
class Crowller {
private secret = 'x3b174jsx';
private url = `http://www.dell-lee.com/typescript/demo.html?secret=${this.secret}`;
private rawHtml = '';
constructor() {}
async getHtml() {
const res = await superagent.get(this.url);
return res.text;
}
async getCourseInfo(html: string) {
const $ = cheerio.load(html);
const $courseItems = $('.course-item');
const courseInfos: Course[] = [];
$courseItems.map((index, ele) => {
const descs = $(ele).find('.course-desc');
const title = descs.eq(0).text();
const count = parseInt(descs.eq(1).text().split(':')[1]);
courseInfos.push({ title, count });
});
return {
time: new Date().getTime(),
data: courseInfos,
};
}
genJSON(courseInfo: CourseInfo) {
const filePath = path.resolve(__dirname, '../dist/data.json');
let fileContent: FileContent = {};
if (fs.existsSync(filePath)) {
try {
fileContent = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
} catch (e) {
console.log('无法解析');
}
}
fileContent[courseInfo.time] = courseInfo.data;
fs.writeFileSync(filePath, JSON.stringify(fileContent));
}
async init() {
const html = await this.getHtml();
const courseInfo = await this.getCourseInfo(html);
this.genJSON(courseInfo);
}
}
const crowller = new Crowller();
crowller.init();
点击查看更多内容
为 TA 点赞
评论
共同学习,写下你的评论
评论加载中...
作者其他优质文章
正在加载中
感谢您的支持,我会继续努力的~
扫码打赏,你说多少就多少
赞赏金额会直接到老师账户
支付方式
打开微信扫一扫,即可进行扫码打赏哦