I'm trying to extract data from a list of >1000 URLs using a script that uses UrlFetchApp.fetch(url)
and regex based on this article.
This is the code I'm using.
function importRegex(url, regex_string) { var html, content = ''; var response = UrlFetchApp.fetch(url); if (response) { html = response.getContentText(); if (html.length && regex_string.length) { var regex = new RegExp( regex_string, "i" ); content = html.match(regex)[1]; } } content = unescapeHTML(content); Utilities.sleep(1000); // avoid call limit by adding a delay return content; }var htmlEntities = { cent: '¢', pound: '£', yen: '¥', euro: '€', copy: '©', reg: '®', lt: '<', gt: '>', mdash: '–', quot: '"', amp: '&', apos: '\''};function unescapeHTML(str) { return str.replace(/\&([^;]+);/g, function (entity, entityCode) { var match; if (entityCode in htmlEntities) { return htmlEntities[entityCode]; } else if (match = entityCode.match(/^#x([\da-fA-F]+)$/)) { return String.fromCharCode(parseInt(match[1], 16)); } else if (match = entityCode.match(/^#(\d+)$/)) { return String.fromCharCode(~~match[1]); } else { return entity; } });};
and the importregex function formula I'm using is
=importRegex(A4, "<h1 class=""ch-title"".*?>(.*)<\/h1>")
It gives the following error
TypeError: Cannot read property '1' of null (line 9).
I'm not sure how to fix it.