Browse Source

Fetches and saves.

Serially, so there is less probability
of rate limiting.

Slow.
Herby Vojčík 4 years ago
parent
commit
115ef097cd
1 changed files with 26 additions and 0 deletions
  1. 26 0
      index.js

+ 26 - 0
index.js

@@ -0,0 +1,26 @@
+// node --require regenerator-runtime/runtime --require ./babel-local --require isomorphic-fetch index.js
+
+import {createWriteStream} from 'fs';
+
+async function work () {
+    let rootUri = 'https://data.gov.sk/api/3/action/package_search?fq=%2btitle:Vestn%C3%ADk%20verejn%C3%A9ho%20obstar%C3%A1vania*&rows=9999&sort=metadata_created_date%20asc';
+    const datasetsRaw = await fetch(rootUri);
+    const {result: {count, results}} = await datasetsRaw.json();
+    console.info(`Datasets: ${count}`);
+    for (let {title, num_resources, resources} of results) {
+        console.info(`Fetching: ${title}, ${num_resources} files`);
+        for (let {name, description, url} of resources) {
+            const [, issue, year] = name.match(/(\d+)\/(\d+)/);
+            console.info(`File ${year} / ${issue}: ${description}`);
+            const xmlRaw = await fetch(url);
+            await new Promise((resolve, reject) =>
+                xmlRaw.body.pipe(createWriteStream(`${year}.${issue}.xml`))
+                    .on('close', resolve)
+                    .on('error', reject)
+            );
+        }
+    }
+    return results;
+}
+
+work().catch(error => console.error(error));