C
In recent days, you've downloaded the site with questions saying that
the JS and the API Web are not yet well understood.
Trying to work with complex puppeteer tools.Alex, I support you. https://ru.stackoverflow.com/a/1194798/2659 which gave you https://ru.stackoverflow.com/a/1194798/2659 ♪ Alas, you lack the knowledge of technology, Your technical Circle. And as a consequence, you're doing the task hard, long and bad.When I decided to justify this point, I looked at your code, on the website. https://etfdb.com/ and attempts to extract the data from the home of the page /questions/tagged/puppeteer ♪Page https://etfdb.com/screener/#page=1&structure=ETF÷nd_frequency=Monthly And it doesn't work in the panic, and every "patch" table makes ajax request for a server, gets data and draws a table.There's ajah-request, there's data, there's no point in jumping on dom elements and tagging.And the fact that you chose this approach tells us, those who accompany your questions at ru.so, about the narrowness of your technical Circle. ♪ ♪Study the systematic language and the API Web.
documentation on the tools you need. There's no other way.Trying to ease the fragment of the code on fragments,
By borrowing them from other people ' s examples and not knowing their nature, that is
It's a pointless blackout.Thank you for finishing it up.On the other hand, I appreciate your persistent attempts to understand! They're superficial, but persistent, that's the solution.Look, the script is simple:We're building a browser.We're making logic in the keyboard, I thought you'd be advised.
ResponseIf you ask the page, I'll make the login. waitUntil: 'networkidle2'to wait for all the violets to be loaded. This may be an extra form of sabmitia classically, or perhaps an authentication request is made by js, and then the crypts are needed. I didn't get it, but you can watch it!In the evaluation (i.e. inside the browser launched)
Several ajax queries getting the data from the server. API returns meta-information on page number, page number, page number, Number of pages♪ Knowing the number of pages is very convenient because we can make exactly the number of requests!And we're entire collection back to the place where we're supposed to evaluate.Close the browser.Work with data.Objection Node.js v14.13.0, puppeteer 5.3.1.const puppeteer = require('puppeteer');
const { promisify } = require('util');
const credentials = {
login: 'alexyavorskiy2005-0711@mail.ru',
password: 'alex07112005'
};
(async () => {
try {
await spy();
process.exit(0);
} catch (err) {
console.error(err);
process.exit(1);
}
})();
async function spy() {
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto(
'https://etfdb.com/members/login/',
{ waitUntil: 'networkidle2', timeout: 0 }
);
await page.click('#user_login');
await page.keyboard.type(credentials.login);
await page.click('#password');
await page.keyboard.type(credentials.password);
await page.click('#login-button');
await promisify(setTimeout)(2000);
const etfData = await page.evaluate(async () => {
const sleep = (timeout = 500) => new Promise((r) => setTimeout(r, timeout));
let resultData = [];
const fetchBody = {
structure: ['ETF'],
dividend_frequency: ['Monthly'],
only: ['meta', 'data', 'count']
};
const fetchOptions = {
method: 'post',
headers: {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, POST, PUT, DELETE, OPTIONS'
},
};
for (let i = 1, curr = 1; i<= curr; i++) {
try {
fetchBody.page = i;
const res = await fetch(
'https://etfdb.com/api/screener/',
{ ...fetchOptions, body: JSON.stringify(fetchBody) }
);
const json = await res.json();
if (json) {
if (json.data && Array.isArray(json.data)) {
resultData = resultData.concat(json.data);
}
if (json.meta && 'total_pages' in json.meta && +json.meta.total_pages > 0) {
curr = +json.meta.total_pages;
}
}
await sleep(); // на всякий случай, вдруг там ограничение по количеству запросов
} catch (err) {
console.error(err);
}
}
return resultData;
});
await browser.close();
const filteringOverallRating = ['A+', 'A', 'A-', 'B+'];
const etfABData = etfData.filter(({ overall_rating }) => filteringOverallRating.includes(overall_rating));
console.log(JSON.stringify(etfABData, null, 2));
console.log(total items = ${etfData.length}, filtered items = ${etfABData.length});
return etfABData;
}
In the consoles:[
{
"symbol": {
"type": "link",
"text": "BSAE",
"url": "/etf/BSAE/"
},
"name": {
"type": "link",
"text": "Invesco BulletShares 2021 USD Emerging Markets Debt ETF",
"url": "/etf/BSAE/"
},
"mobile_title": "BSAE - Invesco BulletShares 2021 USD Emerging Markets Debt ETF",
"price": "$25.37",
"assets": "$10.12",
"average_volume": "2,508",
"ytd": "2.32%",
"overall_rating": "A+",
"asset_class": "Bond"
},
{
"symbol": {
"type": "link",
"text": "QDIV",
"url": "/etf/QDIV/"
},
"name": {
"type": "link",
"text": "Global X S&P 500 Quality Dividend ETF",
"url": "/etf/QDIV/"
},
"mobile_title": "QDIV - Global X S&P 500 Quality Dividend ETF",
"price": "$23.40",
"assets": "$5.71",
"average_volume": "809",
"ytd": "-11.17%",
"overall_rating": "B+",
"asset_class": "Equity"
},
...
...
...
]
total items = 415, filtered items = 163