node.js爬取知乎,哔哩哔哩排行榜-技术鸭的文章论坛-前端交流-技术鸭(jishuya.cn)

node.js爬取知乎,哔哩哔哩排行榜

上代码

const cheerio = require("cheerio");
const superagent = require("superagent");
let http = require('http')
let https = require('https')
let url = require('url')
const axios = require("axios");
const request = require('request');
const fs = require('fs');

const app = http.createServer((req, res) => {
    let urlobj = url.parse(req.url, true) // 转换为对象
    console.log(urlobj);
    res.writeHead(200, {
        "Content-Type": "application/json;charset=utf-8",
        "access-control-allow-origin": "*" // 允许所有域通过控制
    })
    switch (urlobj.pathname) {
        case '/api/zhihu':
            httpsGit((data) => res.end(data))
            break
        case '/api/bilibili':
            Apibilibili((data) => res.end(data))
            break
        default:
            res.end('404')
            break
    }
})
app.listen(8080, () => {
    console.log('localhost:8080')
})
function httpsGit(response) {
    let list = ''
    axios.get(`https://www.zhihu.com/billboard`).then((arr) => {
        let $ = cheerio.load(arr.data);
        var hotList = []
        $("a.HotList-item").each((index, element) => {
            var entry = {}
            entry["title"] = $(element).find(".HotList-itemTitle").text();
            entry["excerpt"] = $(element).find(".HotList-itemTitle").text();
            var reg = /<img src="(.*)" alt=/;
            var picture_url = reg.exec(String($(element).html()));
            if (picture_url != null) {
                entry["picture_url"] = picture_url[1].trim();
            } else {
                entry["picture_url"] = "null";
            }
            hotList.push(entry);
        });
        response(JSON.stringify(hotList, null, "\t"))
    });
}

function Apibilibili(response) { /* https://www.bilibili.com/v/popular/rank/all */
    const weiboURL = "https://www.bilibili.com";
    const hotSearchURL = weiboURL + "/v/popular/rank/all";
    let options = {
        headers: {
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/100.0.1185.36"
        }
    }
    request.get(hotSearchURL, options, (err, res, body) => {
        response(JSON.stringify(getRankList(body)))
    })
}

function getRankList(data) {
    let regExp = /\"rankList\":\[\{+[\s\S]*rankNote/g;
    let result = data.match(regExp)[0];

    // 转义\u002F 并切片
    result = result.slice(11, -10).replace(/\\u002F/g, '/');
    result = JSON.parse(result);
    return result;
}

记得先下载所需的包,然后node启动项目

哔哩哔哩数据: 请求 http://localhost:8080/api/bilibili

9c2e4d5fe2221658

知乎:  请求 http://localhost:8080/api/zhihu

d0c9bfe612221738

 

请登录后发表评论

    请登录后查看回复内容