三体动画在B站的真实评分究竟如何，来研究一段JS爬虫代码

研究一段贼有意思的JS爬虫代码，用于抓取B站作品网页真实评分数据，让可能存在的控分锁分行为现出原形。

观前提醒：本文所展示的代码来源于互联网，仅供学习研究使用。爬取的数据均采集自公开内容，代码仅用于进行实时的数据整合，以避免重复性人工劳动，不涉及商业机密内容。但由于此行为会增加B站服务器运行压力，故不建议恶意频繁使用。

关于三体动画

近期，等待了已久的三体动画已经开播了一段时间。改编自刘慈欣同名长篇科幻小说《三体》，由高分动画《灵笼》的创作团队“艺画开天”接手，制作耗时近5年，网传投资2亿多，是B站近几年投入最多、量级最大的国创作品。再加上开播时B站官方铺天盖地的宣发，吸足了大家的眼球。12月10日，《三体》动画在B站开播，首日播放量就破亿。11月29日至12月11日的10个交易日，B站市值大涨353亿元。

但是随着前两集的放映，网络上对其制作质量出现了很多争议。随着第三至五集的播出，风评口碑更是一跃而下，遭到网友群嘲，豆瓣评分持续下跌，截稿前评分跌到5.1分^[1]。但由于存在某些神秘力量，B站评分被锁定在了8.2分。

爬虫脚本和使用方式

今天刷到了一个使用JS爬取B站真实评分数据的视频^[2]，原理是在浏览器控制台复制运行后，通过密集的网络请求获取响应数据，计算长评和短评的打分数据，然后在控制台打印出平均分。虽然最终计算结果可能会由于小数位数的取舍稍有偏差，但基本符合真实情况（前提是B站评分条数和星级数据是真实且无删改的）。OK话不多说，上代码^[3]：

const allScore = []
let totalCount = {
    short: 0,
    long: 0,
}
let render = null
let rmDialog = null
let mid

try {
    mid =  location.href.match(/media\/md(\d+)\//)[1]
} catch (_) {
    _;
}

if (!mid) { throw new Error("未进入介绍详情页面") }

async function getScore(next, type) {

    let url = `https://api.bilibili.com/pgc/review/${type}/list?media_id=${mid}&ps=12575&sort=0`

    if (next) {
        url += `&cursor=${next}`
    }
    const res = await fetch(url, { "method": "GET" });
    const { data } = await res.json()
    if (totalCount[type] == 0) {
        totalCount[type] = data.total
    }
    return data
}

async function scoreMain(type) {
    let { list, next } = await getScore(undefined, type)
    handlerList(list)

    while (true) {
        const data = await getScore(next, type)
        handlerList(data.list)
        render(type)
        next = data.next
        if (next == 0) {
            return
        }
    }
}
function average() {
    const total = allScore.reduce((p, v) => {
        return p + v
    }, 0)
    const s = total / allScore.length
    const sf = s.toFixed(1)
    document.getElementsByClassName("media-info-score-content")[0].innerText = sf

    const starLc = parseInt(Math.round(sf / 2))
    const starHc = 5 - starLc

    const starsDom = document.getElementsByClassName("review-stars")[0]
    starsDom.innerHTML = ''
    for (let i = 0; i < starLc; i++) {
        const star = document.createElement('i')
        star.className = "icon-star icon-star-light"
        starsDom.appendChild(star)
    }
    for (let i = 0; i < starHc; i++) {
        const star = document.createElement('i')
        star.className = "icon-star icon-star-half"
        starsDom.appendChild(star)
    }

    console.log('平均分:', sf)
}
function handlerList(list) {
    allScore.push(...list.map(item => item.score))
}
function beforeRender() {
    const dialog = document.createElement('div')
    document.body.appendChild(dialog)
    dialog.style.position = 'fixed'
    dialog.style.width = '100%'
    dialog.style.height = '100%'
    dialog.style.background = 'rgba(0,0,0,.8)'
    dialog.style.top = '0'
    dialog.style.left = '0'
    dialog.style.zIndex = '999'
    dialog.style.display = 'flex'
    dialog.style.alignItems = 'center'
    dialog.style.justifyContent = 'center'


    const dialogContent = document.createElement('div')
    dialog.appendChild(dialogContent)

    dialogContent.style.width = '455px'
    dialogContent.style.height = '200px'
    dialogContent.style.background = '#fff'
    dialogContent.style.borderRadius = '6px'
    dialogContent.style.padding = '51px 0'

    const shortWrap = document.createElement('div')
    dialogContent.appendChild(shortWrap)
    const longWrap = document.createElement('div')
    dialogContent.appendChild(longWrap)

    shortWrap.style.width = longWrap.style.width = '455px'
    shortWrap.style.height = longWrap.style.height = '100px'
    shortWrap.style.display = longWrap.style.display = 'flex'
    shortWrap.style.alignItems = longWrap.style.alignItems = 'center'
    shortWrap.style.justifyContent = longWrap.style.justifyContent = 'center'

    // --------------
    const shortw1 = document.createElement('div')
    const longw1 = document.createElement('div')
    shortWrap.appendChild(shortw1)
    longWrap.appendChild(longw1)
    shortw1.innerText = '短评:'
    longw1.innerText = '长评:'
    longw1.style.fontSize = shortw1.style.fontSize = '14px'
    longw1.style.color = shortw1.style.color = '#333'
    longw1.style.marginRight = shortw1.style.marginRight = '16px'


    const shortw2 = document.createElement('div')
    const longw2 = document.createElement('div')
    shortWrap.appendChild(shortw2)
    longWrap.appendChild(longw2)
    longw2.style.width = shortw2.style.width = '300px'
    longw2.style.height = shortw2.style.height = '32px'
    longw2.style.background = shortw2.style.background = '#eee'
    longw2.style.position = shortw2.style.position = 'relative'


    const shortPrg = document.createElement('div')
    const longPrg = document.createElement('div')
    shortw2.appendChild(shortPrg)
    longw2.appendChild(longPrg)

    longPrg.style.position = shortPrg.style.position = 'absolute'
    longPrg.style.left = shortPrg.style.left = '0'
    longPrg.style.top = shortPrg.style.top = '0'
    longPrg.style.width = shortPrg.style.width = '0%'
    longPrg.style.height = shortPrg.style.height = '100%'
    longPrg.style.background = shortPrg.style.background = '#ff85ad'


    render = function (type) {
        const dom = type == 'long' ? longPrg : shortPrg
        let width;
        if (type == 'long') {
            width = ((allScore.length - totalCount.short) * 100 / totalCount.long) + '%'
        } else {
            width = (allScore.length * 100 / totalCount.short) + '%'
        }
        dom.style.width = width
    }

    rmDialog = function () {
        document.body.removeChild(dialog)
    }
}


async function main() {
    beforeRender()
    console.log("--统计短评");
    await scoreMain('short')
    console.log("--统计长评");
    await scoreMain('long')
    average()
    rmDialog()
}
main()

在浏览器打开哔哩哔哩-三体作品页面（其他影视作品也可以）
复制代码，在控制台粘贴，回车运行，等待进度条加载完毕即可。
统计结束，根据页面长短评公开数据，B站三体动画在2023年1月4日的评分约为 5.3分，而不是恒定不变的 8.2分。

代码详细解读

1. 获取作品媒体ID

定义一些变量，并获取作品媒体ID

// 定义数组常量，之后存放评分数据
const allScore = []
// 定义数组对象，存放短评、长评条数
let totalCount = {
    short: 0,
    long: 0,
}
// 定义渲染
let render = null
// 定义对话框变量，用于作为页面进度条对话框元素
let rmDialog = null
// 定义媒体ID，它是区分B站视频媒体作品的标识
let mid
// try/catch/finally 语句，用于处理代码中可能出现的错误信息
try {
    // match()方法，配合正则表达式，用于检索当前页面地址栏中媒体ID
    mid =  location.href.match(/media\/md(\d+)\//)[1]
} catch (_) {
    // 在代码块发生错误时，控制台打印报错语句
    _;
}

// 判断媒体ID是否获取成功，用于判断当前页面是否可用
if (!mid) { throw new Error("未进入介绍详情页面") }

下图为代码中正则表达式的可视化，便于辅助理解

2. 获取评分数据

// 声明异步函数，用于获取请求接口返回的分数
// 参数next；参数type
async function getScore(next, type) {
    // 接口请求地址
    // 参数media_id值为媒体ID；
    // 参数ps可能为获取评分的数量，但数量再大，返回数据中也似乎最多为30条；
    // 参数sort值可能为排序方式；
    let url = `https://api.bilibili.com/pgc/review/${type}/list?media_id=${mid}&ps=12575&sort=0`
    // 当next存在时，拼接地址栏参数cursor，含义为光标值
    if (next) {
        url += `&cursor=${next}`
    }
    // 
    const res = await fetch(url, { "method": "GET" });
    // 
    const { data } = await res.json()
    if (totalCount[type] == 0) {
        totalCount[type] = data.total
    }
    return data
}

其中请求结果返回示例如下：

{
    // 请求状态码
    "code":0,
    // 返回数据
    "data":{
        // 评分列表
        "list":[
            // 第一条
            {"author":{"avatar":"http://i0.hdslb.com/bfs/face/1971e8f46f18f180e1a15a1be900136136662e35.jpg","mid":34862004,"uname":"FXGKT","vip":{"avatar_subscript_url":"","nickname_color":"","themeType":0,"vipStatus":0,"vipType":1},"vip_label":{"bg_color":"","bg_style":0,"border_color":"","label_theme":"","path":"","text":"","text_color":""}},"content":"太好看了！这简直太像了！像云天明留在地球的那一部分，太像了！","ctime":1672106551,"media_id":4315402,"mid":34862004,"mtime":1672106551,"progress":"看到第4话","review_id":43713473,"score":6,"stat":{"disliked":0,"liked":0,"likes":5}},
            // 第二条
            {"author":{"avatar":"http://i0.hdslb.com/bfs/face/116ad12be0dd5ee9cc23966ef9f772bafed22037.jpg","mid":37004880,"uname":"我突然爱吃海鲜","vip":{"avatar_subscript_url":"","nickname_color":"#FB7299","themeType":0,"vipStatus":1,"vipType":2},"vip_label":{"bg_color":"#FB7299","bg_style":1,"border_color":"","label_theme":"annual_vip","path":"","text":"年度大会员","text_color":"#FFFFFF"}},"content":"这是计划的一部分","ctime":1671345554,"media_id":4315402,"mid":37004880,"mtime":1671345554,"progress":"看到第5话","review_id":43368121,"score":2,"stat":{"disliked":0,"liked":0,"likes":1}}
            // 共有三十条，这里不再展示
        ],
        // next值将作为地址栏参数的光标值cursor
        "next":83142020284089,
        // 长评/短评总数
        "total":12858
    },
    // 请求返回信息，成功
    "message":"success"
}

3. 循环请求评分数据

// 声明异步函数，用于获取请求接口返回的分数
// 参数type取值为short/long，代表评论是短评/长评
async function scoreMain(type) {
    // 获取评分数据，其中参数next值省略
    // 解构赋值其中的list(评分列表)和next(光标值)
    let { list, next } = await getScore(undefined, type)
    // 遍历list评分列表，依次添加到评分数组
    handlerList(list)

    while (true) {
        // 循环获取评分数据，参数为上次请求获取到的next值，以及评论type值
        const data = await getScore(next, type)
        // 遍历list评分列表，依次添加到评分数组
        handlerList(data.list)
        // 根据type值，渲染对应的进度条进度值
        render(type)
        // 更新next光标值
        next = data.next
        // 当返回结果中next为0时，结束循环
        if (next == 0) {
            return
        }
    }
}

4. 计算平均分并更新星星

// 声明函数，用于计算平均分
function average() {
    // 使用reduce()方法，累加评分数组，获得评分和
    const total = allScore.reduce((p, v) => {
        return p + v
    }, 0)
    // 评分和除以评分总数，获得平均评分
    const s = total / allScore.length
    // 平均评分保留一位小数
    const sf = s.toFixed(1)
    // 修改页面相应位置的值
    document.getElementsByClassName("media-info-score-content")[0].innerText = sf

    // 10分等于五星，一颗星是2分，所以星星数量等于分数的一半
    // 黄色星星数：星星数量使用round()方法四舍五入取整，再使用parseInt()函数转换为十进制，没有半星的情况
    const starLc = parseInt(Math.round(sf / 2))
    // 灰色星星数：五分减平均分，为距离满分的分差
    const starHc = 5 - starLc
    // 获取页面中五星元素节点
    const starsDom = document.getElementsByClassName("review-stars")[0]
    // 删除原来的五颗星星
    starsDom.innerHTML = ''
    // 根据计算结果，创建黄色星星
    for (let i = 0; i < starLc; i++) {
        const star = document.createElement('i')
        star.className = "icon-star icon-star-light"
        starsDom.appendChild(star)
    }
    // 根据计算结果，创建灰色星星
    for (let i = 0; i < starHc; i++) {
        const star = document.createElement('i')
        star.className = "icon-star icon-star-half"
        starsDom.appendChild(star)
    }
    // 控制台打印出结果
    console.log('平均分:', sf)
}

5. 创建对话框并渲染进度条

// 声明函数，用于遍历list评分列表，依次添加到评分数组
function handlerList(list) {
    allScore.push(...list.map(item => item.score))
}
// 声明函数，渲染进度条之前执行，用于添加对话框
function beforeRender() {
    // 创建对话框div元素节点
    const dialog = document.createElement('div')
    // 添加到页面body子节点列表的末尾
    document.body.appendChild(dialog)
    // 定义对话框样式
    dialog.style.position = 'fixed'
    dialog.style.width = '100%'
    dialog.style.height = '100%'
    dialog.style.background = 'rgba(0,0,0,.8)'
    dialog.style.top = '0'
    dialog.style.left = '0'
    dialog.style.zIndex = '999'
    dialog.style.display = 'flex'
    dialog.style.alignItems = 'center'
    dialog.style.justifyContent = 'center'

    // 创建对话框内容div元素节点
    const dialogContent = document.createElement('div')
    // 添加到对话框dialog子节点列表的末尾
    dialog.appendChild(dialogContent)
    // 定义对话框内容样式
    dialogContent.style.width = '455px'
    dialogContent.style.height = '200px'
    dialogContent.style.background = '#fff'
    dialogContent.style.borderRadius = '6px'
    dialogContent.style.padding = '51px 0'

    // 创建短评进度条容器
    const shortWrap = document.createElement('div')
    // 添加到对话框内容dialogContent子节点列表的末尾
    dialogContent.appendChild(shortWrap)
    // 创建长评进度条容器
    const longWrap = document.createElement('div')
    // 添加到对话框内容dialogContent子节点列表的末尾
    dialogContent.appendChild(longWrap)
    // 定义进度条容器样式
    shortWrap.style.width = longWrap.style.width = '455px'
    shortWrap.style.height = longWrap.style.height = '100px'
    shortWrap.style.display = longWrap.style.display = 'flex'
    shortWrap.style.alignItems = longWrap.style.alignItems = 'center'
    shortWrap.style.justifyContent = longWrap.style.justifyContent = 'center'

    // 分别给两个进度条容器中添加文字说明
    const shortw1 = document.createElement('div')
    const longw1 = document.createElement('div')
    shortWrap.appendChild(shortw1)
    longWrap.appendChild(longw1)
    shortw1.innerText = '短评:'
    longw1.innerText = '长评:'
    longw1.style.fontSize = shortw1.style.fontSize = '14px'
    longw1.style.color = shortw1.style.color = '#333'
    longw1.style.marginRight = shortw1.style.marginRight = '16px'

    // 分别给两个进度条容器中添加进度条灰色背景
    const shortw2 = document.createElement('div')
    const longw2 = document.createElement('div')
    shortWrap.appendChild(shortw2)
    longWrap.appendChild(longw2)
    longw2.style.width = shortw2.style.width = '300px'
    longw2.style.height = shortw2.style.height = '32px'
    longw2.style.background = shortw2.style.background = '#eee'
    longw2.style.position = shortw2.style.position = 'relative'

    // 分别给两个进度条灰色背景中添加粉色进度
    const shortPrg = document.createElement('div')
    const longPrg = document.createElement('div')
    shortw2.appendChild(shortPrg)
    longw2.appendChild(longPrg)
    // 定义粉色进度样式，宽度除外
    longPrg.style.position = shortPrg.style.position = 'absolute'
    longPrg.style.left = shortPrg.style.left = '0'
    longPrg.style.top = shortPrg.style.top = '0'
    longPrg.style.width = shortPrg.style.width = '0%'
    longPrg.style.height = shortPrg.style.height = '100%'
    longPrg.style.background = shortPrg.style.background = '#ff85ad'

    // 声明函数，用于根据type值，渲染对应的进度条进度值
    render = function (type) {
        // 定义常量dom，根据type值的不同，分别赋为长评/短评的页面进度条元素
        const dom = type == 'long' ? longPrg : shortPrg
        let width;
        if (type == 'long') {
            // 当type为long时，更新长评进度条宽度
            width = ((allScore.length - totalCount.short) * 100 / totalCount.long) + '%'
        } else {
            // 否则更新短评进度条宽度
            width = (allScore.length * 100 / totalCount.short) + '%'
        }
        dom.style.width = width
    }

    // 声明函数，赋给变量rmDialog，用于移除对话框
    rmDialog = function () {
        document.body.removeChild(dialog)
    }
}

6. 依次执行函数

async function main() {
    beforeRender() // 添加对话框
    console.log("--统计短评");
    await scoreMain('short') // 获取请求接口返回的短评分数，依次添加到评分数组
    console.log("--统计长评");
    await scoreMain('long') // 获取请求接口返回的长评分数，依次添加到评分数组
    average() // 计算平均分并更新星星，在控制台打印结果
    rmDialog() // 移除对话框
}
main()

OK分析完毕，整体逻辑就是以上五步，仔细想想就会觉得非常简单，都是一些JS操作。

JS写的脚本也能叫做爬虫吗

一般我们看到爬虫俩字想到的是Python，那咱们这种JavaScript写的爬取页面数据的东西也能称之为爬虫吗？答案是肯定的，并且因为JS不用像Py那样需要提前安装环境，所以某些时候比Py爬取数据更加方便。

相比Python，JavaScript有三个优势^[11]：

JavaScript异步IO机制适用于爬虫这种IO密集型任务。JavaScript中的回调非常自然，使用异步网络请求能够充分利用CPU。
JavaScript中的jQuery毫无疑问是最强悍的HTML解析工具，使用JavaScript写爬虫能够减少学习负担和记忆负担。虽然Python中有PyQuery，但终究还是比不上jQuery自然。
爬取结果多为JSON，JavaScript是最适合处理JSON的语言。

【相关内容】：

[1] 三体 (2022) - 豆瓣评分

[2] 【技术】还三体动画一个公道！

[3] 【娱乐】我算出了三体动画的真实评分用于处理代码中可能出现的错误信息。

[4] JavaScript try/catch/finally 语句 | 菜鸟教程

[5] JavaScript match() 方法 | 菜鸟教程在字符串内检索指定的值，或找到一个或多个正则表达式的匹配。

[6] ES6 async 函数 | 菜鸟教程 async 是 ES7 才有的与异步操作有关的关键字，和 Promise ， Generator 有很大关联的。async 函数中可能会有 await 表达式，async 函数执行时，如果遇到 await 就会先暂停执行，等到触发的异步操作完成后，恢复 async 函数的执行并返回解析值。

[7] JavaScript reduce() 方法 | 菜鸟教程接收一个函数作为累加器，数组中的每个值（从左到右）开始缩减，最终计算为一个值

[8] JavaScript round() 方法 | 菜鸟教程把一个数字舍入为最接近的整数

[9] JavaScript parseInt() 函数 | 菜鸟教程解析一个字符串，并返回一个整数

[10] HTML DOM appendChild() 方法向节点的子节点列表的末尾添加新的子节点

[11] 知乎 - 浏览器js能不能做爬虫？爬虫，简单地说就是发一个请求，然后按一定逻辑解析获取到的数据。在Node环境下，可以用Request模块请求一个地址，得到返回信息，再用正则匹配数据，或者用Cheerio模块包装-方便定位相关的标签。在浏览器环境下，也类似，可以用标签的src属性或Ajax请求一个地址，得到返回信息，再用正则匹配数据，或者用jQuery模块包装-方便定位相关的标签项。