[{"op": "core/text-transform","description": "Text transform on cells in column Column 3 using expression grel:value.replace(\"-- \",\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "Column 3","expression": "grel:value.replace(\"-- \",\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column Column 4 using expression grel:value.replace(\"DOI:\",\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "Column 4","expression": "grel:value.replace(\"DOI:\",\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/column-addition-by-fetching-urls","description": "Create column FetchShowPage at index 4 by fetching URLs based on column Column 4 using expression grel:value","engineConfig": {"mode": "row-based","facets": []},"newColumnName": "FetchShowPage","columnInsertIndex": 4,"baseColumnName": "Column 4","urlExpression": "grel:value","onError": "set-to-blank","delay": 100,"cacheResponses": false,"httpHeadersJson": null},{"op": "core/column-addition","description": "Create column keywords at index 5 based on column FetchShowPage using expression grel:value.parseHtml().select(\"div#articleSubject\")[0].toString()","engineConfig": {"mode": "row-based","facets": []},"newColumnName": "keywords","columnInsertIndex": 5,"baseColumnName": "FetchShowPage","expression": "grel:value.parseHtml().select(\"div#articleSubject\")[0].toString()","onError": "set-to-blank"},{"op": "core/text-transform","description": "Text transform on cells in column keywords using expression grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')","engineConfig": {"mode": "row-based","facets": []},"columnName": "keywords","expression": "grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column keywords using expression grel:value.replace(\"Keywords\",\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "keywords","expression": "grel:value.replace(\"Keywords\",\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column keywords using expression grel:value.replace(\";\",\",\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "keywords","expression": "grel:value.replace(\";\",\",\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/column-addition","description": "Create column abstract at index 5 based on column FetchShowPage using expression grel:value.parseHtml().select(\"div#articleAbstract\")[0].toString()","engineConfig": {"mode": "row-based","facets": []},"newColumnName": "abstract","columnInsertIndex": 5,"baseColumnName": "FetchShowPage","expression": "grel:value.parseHtml().select(\"div#articleAbstract\")[0].toString()","onError": "set-to-blank"},{"op": "core/text-transform","description": "Text transform on cells in column abstract using expression grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')","engineConfig": {"mode": "row-based","facets": []},"columnName": "abstract","expression": "grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column abstract using expression grel:value.replace(\"Abstract\",\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "abstract","expression": "grel:value.replace(\"Abstract\",\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column abstract using expression grel:value.replace(\" \",\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "abstract","expression": "grel:value.replace(\" \",\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/column-addition","description": "Create column PDFlink at index 5 based on column FetchShowPage using expression grel:value.parseHtml().select(\"div#articleFullText\")[0].select(\"a[href]\")[0].toString()","engineConfig": {"mode": "row-based","facets": []},"newColumnName": "PDFlink","columnInsertIndex": 5,"baseColumnName": "FetchShowPage","expression": "grel:value.parseHtml().select(\"div#articleFullText\")[0].select(\"a[href]\")[0].toString()","onError": "set-to-blank"},{"op": "core/text-transform","description": "Text transform on cells in column PDFlink using expression grel:value.replace(\"<a href=\",\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "PDFlink","expression": "grel:value.replace(\"<a href=\",\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column PDFlink using expression grel:value.replace('\"',\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "PDFlink","expression": "grel:value.replace('\"',\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column PDFlink using expression grel:value.replace('class=file target=_parent>PDF/A</a>',\"\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "PDFlink","expression": "grel:value.replace('class=file target=_parent>PDF/A</a>',\"\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column PDFlink using expression grel:value.replace(\"view\",\"viewFile\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "PDFlink","expression": "grel:value.replace(\"view\",\"viewFile\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/column-addition","description": "Create column vol issue no at index 5 based on column FetchShowPage using expression grel:value.parseHtml().select(\"div#breadcrumb\")[0].select(\"a[href]\")[1].toString()","engineConfig": {"mode": "row-based","facets": []},"newColumnName": "vol issue no","columnInsertIndex": 5,"baseColumnName": "FetchShowPage","expression": "grel:value.parseHtml().select(\"div#breadcrumb\")[0].select(\"a[href]\")[1].toString()","onError": "set-to-blank"},{"op": "core/text-transform","description": "Text transform on cells in column vol issue no using expression grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')","engineConfig": {"mode": "row-based","facets": []},"columnName": "vol issue no","expression": "grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/column-split","description": "Split column Column 3 by separator","engineConfig": {"mode": "row-based","facets": []},"columnName": "Column 3","guessCellType": true,"removeOriginalColumn": true,"mode": "separator","separator": "Pp.","regex": false,"maxColumns": 0},{"op": "core/column-rename","description": "Rename column Column 3 2 to page nos","oldColumnName": "Column 3 2","newColumnName": "page nos"},{"op": "core/column-addition","description": "Create column attribution at index 3 based on column Column 3 1 using expression grel:value + \"Journal of Threatened Taxa, \" + cells[\"vol issue no\"].value+\"; pp. \" +cells[\"page nos\"].value+ cells[\"Column 4\"].value","engineConfig": {"mode": "row-based","facets": []},"newColumnName": "attribution","columnInsertIndex": 3,"baseColumnName": "Column 3 1","expression": "grel:value + \"Journal of Threatened Taxa, \" + cells[\"vol issue no\"].value+\"; pp. \" +cells[\"page nos\"].value+ cells[\"Column 4\"].value","onError": "set-to-blank"},{"op": "core/text-transform","description": "Text transform on cells in column abstract using expression grel:value.replace(\"&\",\"&\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "abstract","expression": "grel:value.replace(\"&\",\"&\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column keywords using expression grel:value.replace(\"&\",\"&\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "keywords","expression": "grel:value.replace(\"&\",\"&\")","onError": "keep-original","repeat": false,"repeatCount": 10},{"op": "core/text-transform","description": "Text transform on cells in column keywords using expression grel:value.replace(\"&,\",\"&\")","engineConfig": {"mode": "row-based","facets": []},"columnName": "keywords","expression": "grel:value.replace(\"&,\",\"&\")","onError": "keep-original","repeat": false,"repeatCount": 10}]
> <mailto:openrefine+unsub...@googlegroups.com>.
--
You received this message because you are subscribed to the Google Groups "OpenRefine" group.
To unsubscribe from this group and stop receiving emails from it, send an email to openrefine+...@googlegroups.com.
Can you share an example of the URLs you are requesting?
Owen StephensOwen Stephens Consulting
value.parseHtml().select(\"a\")[0].htmlAttr("href").replace("http","https")
application/unixref+xml
<html><head><title>Handle Redirect</title></head><body><a href="http://threatenedtaxa.org/index.php/JoTT/article/view/3948">http://threatenedtaxa.org/index.php/JoTT/article/view/3948</a></body></html>