The script below allows you to automatically find and download information from one of the freelance exchanges.
The domain name is taken from the Google Spread Sheets page. And search results are also uploaded there.
The function scraper() , which contains two cycles, is used as a control script. In the first loop ( for-loop ), we access the web pages of the site and save them in the variable html . In the second loop ( while-loop ), this variable is sequentially processed using three auxiliary functions:
function scraper() {
const ss = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('Sheet1');
const urlDomain = ss.getRange(1, 1).getValue();
let url = urlDomain;
let count = 0;
for (let page = 1; page < 5; page++) {
url = urlDomain + page + '/';
if (page == 1) url = urlDomain;
let response = UrlFetchApp.fetch(url);
ss.getRange(2, 1).setValue(response.getResponseCode());
let html = response.getContentText();
let p = 0;
while (true) {
let out = getBlock(html, 'div', html.indexOf('class="JobSearchCard-primary"', p));
let block = out[0];
p = out[1] + 1;
if (p == 0) break;
let title1 = getBlock(block, 'div', 0)[0];
let title = getBlock(title1, 'a', 0)[0];
let link = getOpenTag(title1, 'a', 0);
link = getAttrName(link, 'href', 0)
let formula = '=HYPERLINK("https://www.freelancer.com' +link + '", "' + title + '")';
ss.getRange(3 + 3 * count, 2).setValue(formula);
let price = getBlock(block, 'div', block.indexOf('class="JobSearchCard-primary-price'))[0];
if (price.includes('span')) price = deleteBlock(price, 'span', price.indexOf('span'));
ss.getRange(3 + 3 * count + 1, 2).setValue(price).setHorizontalAlignment('right');
let description = getBlock(block, 'p', block.indexOf('class="JobSearchCard-primary-description"'))[0];
ss.getRange(3 + 3 * count, 1, 3).mergeVertically().setValue(description)
.setBorder(true, true, true, true, null, null, '#000000', SpreadsheetApp.BorderStyle.SOLID)
.setVerticalAlignment('middle')
.setWrapStrategy(SpreadsheetApp.WrapStrategy.WRAP);
ss.getRange(3 + 3 * count, 2, 3).setBorder(true, true, true, true, null, null, '#000000', SpreadsheetApp.BorderStyle.SOLID);
let cat = getBlock(block, 'div', block.indexOf('class="JobSearchCard-primary-tags"'))[0];
cat = cat.split('</a>').map(item => item.split('>')[1]);
cat.pop();
cat = cat.join(', ');
ss.getRange(3 + 3 * count + 2, 2).setValue(cat);
count++;
};
};
}
function getAttrName(html, attr, i) {
let idxStart = html.indexOf(attr , i);
if (idxStart == -1) return "Can't to find attr " + attr + ' !';
idxStart = html.indexOf('"' , idxStart) + 1;
let idxEnd = html.indexOf('"' , idxStart);
return html.slice(idxStart,idxEnd).trim();
}
function getOpenTag(html, tag, idxStart) {
let openTag = '<' + tag;
let lenOpenTag = openTag.length;
// where we are?
if (html.slice(idxStart, idxStart + lenOpenTag) != openTag) {
idxStart = html.lastIndexOf(openTag, idxStart);
if (idxStart == -1) return "Can't to find openTag " + openTag + ' !';
};
// begin loop after openTag
let idxEnd = html.indexOf('>', idxStart) + 1;
if (idxStart == -1) return "Can't to find closing bracket '>' for openTag!";
return html.slice(idxStart,idxEnd).trim();
}
function deleteBlock(html, tag, idxStart) { // delete opening & closing tag and info between them
let openTag = '<' + tag;
let lenOpenTag = openTag.length;
let closeTag = '</' + tag + '>';
let lenCloseTag = closeTag.length;
let countCloseTags = 0;
let iMax = html.length;
let idxEnd = 0;
// where we are?
if (html.slice(idxStart, idxStart + lenOpenTag) != openTag) {
idxStart = html.lastIndexOf(openTag, idxStart);
if (idxStart == -1) return ["Can't to find openTag " + openTag + ' !', -1];
};
// begin loop after openTag
let i = html.indexOf('>') + 1;
while (i <= iMax) {
i++;
if (i === iMax) {
return ['Could not find closing tag for ' + tag, -1];
};
let carrentValue = html[i];
if (html[i] === '<'){
let closingTag = html.slice(i, i + lenCloseTag);
let openingTag = html.slice(i, i + lenOpenTag);
if (html.slice(i, i + lenCloseTag) === closeTag) {
if (countCloseTags === 0) {
idxEnd = i + lenCloseTag;
break;
} else {
countCloseTags -= 1;
};
} else if (html.slice(i, i + lenOpenTag) === openTag) {
countCloseTags += 1;
};
};
};
return (html.slice(0, idxStart) + html.slice(idxEnd, iMax)).trim();
}
function getBlock(html, tag, idxStart) { // <tag .... > Block </tag>
let openTag = '<' + tag;
let lenOpenTag = openTag.length;
let closeTag = '</' + tag + '>';
let lenCloseTag = closeTag.length;
let countCloseTags = 0;
let iMax = html.length;
let idxEnd = 0;
// where we are?
if (html.slice(idxStart, idxStart + lenOpenTag) != openTag) {
idxStart = html.lastIndexOf(openTag, idxStart);
if (idxStart == -1) return ["Can't to find openTag " + openTag + ' !', -1];
};
// change start - will start after openTag!
idxStart = html.indexOf('>', idxStart) + 1;
let i = idxStart;
while (i <= iMax) {
i++;
if (i === iMax) {
return ['Could not find closing tag for ' + tag, -1];
};
let carrentValue = html[i];
if (html[i] === '<'){
let closingTag = html.slice(i, i + lenCloseTag);
let openingTag = html.slice(i, i + lenOpenTag);
if (html.slice(i, i + lenCloseTag) === closeTag) {
if (countCloseTags === 0) {
idxEnd = i - 1;
break;
} else {
countCloseTags -= 1;
};
} else if (html.slice(i, i + lenOpenTag) === openTag) {
countCloseTags += 1;
};
};
};
return [html.slice(idxStart,idxEnd + 1).trim(), idxEnd];
}
You can find more information in this video (RU voice):