-
Notifications
You must be signed in to change notification settings - Fork 2
/
scraper.js
90 lines (89 loc) · 2.41 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
// Requiring puppeteer
const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
// Launching browser
puppeteer
.launch({ headless: true })
.then(async browser => {
// Creating a new tab
const page = await browser.newPage();
//Going to ARBK page
await page.goto("https://arbk.rks-gov.net/");
await page.type("#txtNumriBiznesit", "70301460");
await page.click("#Submit1");
await page.once("load", () => {
page.click(
"#content > article > div > table > tbody > tr > td:nth-child(2) > a"
);
page.once("load", () => {
page.content().then(html => {
let raw = {
info: [],
authorized: []
};
let formatted = {};
const $ = cheerio.load(html);
raw.info.push({
key: "emri",
value: $("#MainContent_ctl00_lblBiznesi").text()
});
store_raw_data(
$,
"#MainContent_ctl00_pnlBizneset > table:nth-child(1) > tbody > tr ",
raw,
"info"
);
store_raw_data(
$,
"#MainContent_ctl00_pnlBizneset > table:nth-child(2) > tbody > tr",
raw,
"authorized"
);
console.log(raw);
});
});
});
})
.catch(error => {
console.log(error);
});
// "#MainContent_ctl00_pnlBizneset > table:nth-child(1) > tbody > tr "
function store_raw_data($, htmlSelector, obj, key) {
if ($(htmlSelector).html() != null) {
if (key == "info") {
$(htmlSelector).each((i, elem) => {
obj[key].push({
key: $(elem)
.children("td:nth-child(1)")
.children("b")
.text()
.trim()
.replace(/\s\s+/g, " "),
value: $(elem)
.children("td:nth-child(2)")
.children("span")
.text()
.trim()
.replace(/\s\s+/g, " ")
});
});
} else if (key == "authorized") {
$(htmlSelector).each((i, elem) => {
obj[key].push({
key: $(elem)
.children("td:nth-child(1)")
.children("span")
.text()
.trim()
.replace(/\s\s+/g, " "),
value: $(elem)
.children("td:nth-child(2)")
.children("span")
.text()
.trim()
.replace(/\s\s+/g, " ")
});
});
}
}
}