2👍
✅
One way to to solve this is through checking out the <script>
of the page in the page source around line 1050, which is actually where the charts are initialized. There’s a recurring pattern in the initialization process of the charts, wherein the canvas elements are queried one by one to get their contexts, followed by the variables that offers the labels and statistics of the charts.
This solution covers using node.js, at least the latest version with the following modules:
- cheerio for querying elements in the DOM
- axios for sending an http request to get the page source.
- abstract-syntax-tree to get a javascript object tree representation of the script that we wish to scrape.
Here’s the solution and the source code below:
const cheerio = require('cheerio');
const axios = require('axios');
const { parse, each, find } = require('abstract-syntax-tree');
async function main() {
// get the page source
const { data } = await axios.get(
'https://stats.warbrokers.io/players/i/5d2ead35d142affb05757778'
);
// load the page source with cheerio to query the elements
const $ = cheerio.load(data);
// get the script tag that contains the string 'Chart.defaults'
const contents = $('script')
.toArray()
.map(script => $(script).html())
.find(contents => contents.includes('Chart.defaults'));
// convert the script content to an AST
const ast = parse(contents);
// we'll put all declarations in this object
const declarations = {};
// current key
let key = null;
// iterate over all variable declarations inside a script
each(ast, 'VariableDeclaration', node => {
// iterate over possible declarations, e.g. comma separated
node.declarations.forEach(item => {
// let's get the key to contain the values of the statistics and their labels
// we'll use the ID of the canvas itself in this case..
if(item.id.name === 'ctx') { // is this a canvas context variable?
// get the only string literal that is not '2d'
const literal = find(item, 'Literal').find(v => v.value !== '2d');
if(literal) { // do we have non- '2d' string literals?
// then assign it as the current key
key = literal.value;
}
}
// ensure that the variable we're getting is an array expression
if(key && item.init && item.init.type === 'ArrayExpression') {
// get the array expression
const array = item.init.elements.map(v => v.value);
// did we get the values from the statistics?
if(declarations[key]) {
// zip the objects to associate keys and values properly
const result = {};
for(let index = 0; index < array.length; index++) {
result[array[index]] = declarations[key][index];
}
declarations[key] = result;
// let's make the key null again to avoid getting
// unnecessary array expression
key = null;
} else {
// store the values
declarations[key] = array;
}
}
});
});
// logging it here, it's up to you how you deal with the data itself
console.log(declarations);
}
main();
Source:stackexchange.com