Kaynağa Gözat

Added tests for JSONL parser and stringer.

Eugene Lazutkin 3 yıl önce
ebeveyn
işleme
43401d0613

BIN
tests/data/sample.jsonl.gz


+ 126 - 0
tests/test-jsonl-parser.mjs

@@ -0,0 +1,126 @@
+'use strict';
+
+import test from 'tape-six';
+
+import fs from 'fs';
+import path from 'path';
+import zlib from 'zlib';
+import {Writable} from 'stream';
+
+import {readString} from './helpers.mjs';
+
+import parser from '../src/jsonl/parser.js';
+
+const roundtrip = (t, resolve, len, quant) => {
+  const objects = [];
+  for (let n = 0; n < len; n += 1) {
+    objects.push({
+      stringWithTabsAndNewlines: "Did it work?\nNo...\t\tI don't think so...",
+      anArray: [n + 1, n + 2, true, 'tabs?\t\t\t\u0001\u0002\u0003', false],
+      n
+    });
+  }
+
+  const json = [];
+  for (let n = 0; n < objects.length; n += 1) {
+    json.push(JSON.stringify(objects[n]));
+  }
+
+  const input = json.join('\n'),
+    result = [];
+  readString(input, quant)
+    .pipe(parser())
+    .pipe(
+      new Writable({
+        objectMode: true,
+        write(chunk, _, callback) {
+          result.push(chunk);
+          callback(null);
+        },
+        final(callback) {
+          t.deepEqual(objects, result);
+          resolve();
+          callback(null);
+        }
+      })
+    );
+};
+
+test.asPromise('jsonl parser: smoke test', (t, resolve) => roundtrip(t, resolve));
+
+test.asPromise('jsonl parser: roundtrip with 1 set of objects', (t, resolve) => {
+  roundtrip(t, resolve, 1)
+});
+
+test.asPromise('jsonl parser: roundtrip with 2 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 2);
+});
+
+test.asPromise('jsonl parser: roundtrip with 3 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 3);
+});
+
+test.asPromise('jsonl parser: roundtrip with 4 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 4);
+});
+
+test.asPromise('jsonl parser: roundtrip with 5 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 5);
+});
+
+test.asPromise('jsonl parser: roundtrip with 6 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 6);
+});
+
+test.asPromise('jsonl parser: roundtrip with 7 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 7);
+});
+
+test.asPromise('jsonl parser: roundtrip with 8 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 8);
+});
+
+test.asPromise('jsonl parser: roundtrip with 9 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 9);
+});
+
+test.asPromise('jsonl parser: roundtrip with 10 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 10);
+});
+
+test.asPromise('jsonl parser: roundtrip with 11 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 11);
+});
+
+test.asPromise('jsonl parser: roundtrip with 12 sets of objects', (t, resolve) => {
+  roundtrip(t, resolve, 12);
+});
+
+test.asPromise('jsonl parser: roundtrip with different window sizes', (t, resolve) => {
+  for (let i = 1; i <=12; ++i) {
+    roundtrip(t, resolve, 10, i);
+  }
+});
+
+test.asPromise('jsonl parser: read file', (t, resolve) => {
+  if (!/^file:\/\//.test(import.meta.url)) throw Error('Cannot get the current working directory');
+  const fileName = path.join(path.dirname(import.meta.url.substring(7)), './data/sample.jsonl.gz');
+  let count = 0;
+  fs.createReadStream(fileName)
+    .pipe(zlib.createGunzip())
+    .pipe(parser())
+    .pipe(
+      new Writable({
+        objectMode: true,
+        write(_1, _2, callback) {
+          ++count;
+          callback(null);
+        },
+        final(callback) {
+          t.equal(count, 100);
+          resolve();
+          callback(null);
+        }
+      })
+    );
+});

+ 102 - 0
tests/test-jsonl-stringer.mjs

@@ -0,0 +1,102 @@
+'use strict';
+
+import test from 'tape-six';
+
+import {Writable, Transform} from 'stream';
+
+import {readString} from './helpers.mjs';
+
+import parser from '../src/jsonl/parser.js';
+import stringer from '../src/jsonl/stringer.js';
+
+test.asPromise('jsonl stringer: smoke test', (t, resolve) => {
+  const pattern = {
+      a: [[[]]],
+      b: {a: 1},
+      c: {a: 1, b: 2},
+      d: [true, 1, "'x\"y'", null, false, true, {}, [], ''],
+      e: 1,
+      f: '',
+      g: true,
+      h: false,
+      i: null,
+      j: [],
+      k: {}
+    },
+    string = JSON.stringify(pattern);
+
+  let buffer = '';
+  readString(string)
+    .pipe(parser())
+    .pipe(
+      new Transform({
+        writableObjectMode: true,
+        readableObjectMode: true,
+        transform(chunk, _, callback) {
+          this.push(chunk);
+          callback(null);
+        }
+      })
+    )
+    .pipe(stringer())
+    .pipe(
+      new Writable({
+        write(chunk, _, callback) {
+          buffer += chunk;
+          callback(null);
+        },
+        final(callback) {
+          t.deepEqual(string, buffer);
+          resolve();
+          callback(null);
+        }
+      })
+    );
+});
+
+test.asPromise('jsonl stringer: multiple', (t, resolve) => {
+  const pattern = {
+    a: [[[]]],
+    b: {a: 1},
+    c: {a: 1, b: 2},
+    d: [true, 1, "'x\"y'", null, false, true, {}, [], ''],
+    e: 1,
+    f: '',
+    g: true,
+    h: false,
+    i: null,
+    j: [],
+    k: {}
+  };
+
+  let string = JSON.stringify(pattern),
+    buffer = '';
+  string = string + '\n' + string + '\n' + string;
+
+  readString(string + '\n')
+    .pipe(parser())
+    .pipe(
+      new Transform({
+        writableObjectMode: true,
+        readableObjectMode: true,
+        transform(chunk, _, callback) {
+          this.push(chunk);
+          callback(null);
+        }
+      })
+    )
+    .pipe(stringer())
+    .pipe(
+      new Writable({
+        write(chunk, _, callback) {
+          buffer += chunk;
+          callback(null);
+        },
+        final(callback) {
+          t.deepEqual(string, buffer);
+          resolve();
+          callback(null);
+        }
+      })
+    );
+});