爬虫与反爬虫的斗争:可知阅读平台——第二弹
数次尝试直接读取那找不见rsa私钥失败(js功力不够),又想hook相关函数,可惜还是不会写js...
最终还是放弃了直接读取变量找rsa private key和hook的想法,目光这时锁定在了jsencrypt.js上,为什么呢?既然它都是加密解密,就一定会调用这个js,为何不从这个js直接入手,使用override插入我们的修改版js呢?
说干就干!(无奈js真的不会,只能复制粘贴了)
这是tampermoneky脚本,负责获取地址和加密后的解密key。
// ==UserScript==
// @name keledge
// @namespace http://tampermonkey.net/
// @version 0.1
// @grant GM_xmlhttpRequest
// @description try to take over the world!
// @author You
// @match m.keledge.com/*
// @grant GM_setClipboard
// @grant none
// ==/UserScript==
let allText = {};
(function (open) {
XMLHttpRequest.prototype.open = function () {
if (arguments[1].includes("authorize")) {
console.log("Authorize");
this.addEventListener("load", function () {
let responseOBJ = JSON.parse(this.responseText);
window.dnurl = responseOBJ.Data.Url;
//alert("curl -o '"+responseOBJ.Data.Title+"' "+responseOBJ.Data.Url);
}, false);
}
/* if (arguments[1].includes("Detail")) {
console.log("Detail");
this.addEventListener("load", function () {
let responseOBJ = JSON.parse(this.responseText);
window.title=responseOBJ.Data.Title;
//alert("curl -o '"+responseOBJ.Data.Title+"' "+localStorage["dnurl"]);
}, false);
}*/
open.apply(this, arguments);
};
})(XMLHttpRequest.prototype.open);
这是jsencrypt.js的插入片段
RSAKey.prototype.decrypt = function (ctext) {
var c = parseBigInt(ctext, 16);
var m = this.doPrivate(c);
if (m == null) {
return null;
}
var s = pkcs1unpad2(m, (this.n.bitLength() + 7) >> 3);
//alert(window.btoa(s));
if (window.location.href.split("?")[1].split("&")[1].split("=")[1].split("-").reverse()[0] == "Epub"){
/* fetch(window.dnurl).then(res => res.blob().then(blob => {
var a = document.createElement('a');
var url = window.URL.createObjectURL(blob);
var filename = window.location.href.split("?")[1].split("&")[1].split("=")[1]+"-"+decodeURI(window.location.href.split("?")[1].split("&")[2].split("=")[1])+"_"+window.btoa(s)+".epub";
a.href = url;
a.download = filename;
a.click();
window.URL.revokeObjectURL(url);
}))*/
var b = {"title":decodeURI(window.location.href.split("?")[1].split("&")[2].split("=")[1]),"key":window.btoa(s),"url":window.dnurl};
var text = JSON.stringify(b)
var filename = window.location.href.split("?")[1].split("&")[1].split("=")[1];
var pom = document.createElement('a');
pom.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
pom.setAttribute('download', filename);
if (document.createEvent) {
var event = document.createEvent('MouseEvents');
event.initEvent('click', true, true);
pom.dispatchEvent(event);
}
else {
pom.click();
}
}
大概执行流程就是网页会先抓取authorize,被tampermonkey捕捉并得到url,接着会下载第一页资源,并调用jsencrypt.js解密,此时被我们修改的jsencrypt直接捕获解密明文key,再调用浏览器下载就万事大吉啦!
这时大部分的epub都能下载啦,但是有部分epub下载失败,pdf资源抓取相对麻烦,就懒得写啦~