test-jsonl-parserStream.mjs 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. 'use strict';
  2. import test from 'tape-six';
  3. import fs from 'fs';
  4. import path from 'path';
  5. import zlib from 'zlib';
  6. import {Writable} from 'stream';
  7. import {readString} from './helpers.mjs';
  8. import parserStream from '../src/jsonl/parserStream.js';
  9. const roundtrip = (t, resolve, len, quant) => {
  10. const objects = [];
  11. for (let n = 0; n < len; n += 1) {
  12. objects.push({
  13. stringWithTabsAndNewlines: "Did it work?\nNo...\t\tI don't think so...",
  14. anArray: [n + 1, n + 2, true, 'tabs?\t\t\t\u0001\u0002\u0003', false],
  15. n
  16. });
  17. }
  18. const json = [];
  19. for (let n = 0; n < objects.length; n += 1) {
  20. json.push(JSON.stringify(objects[n]));
  21. }
  22. const input = json.join('\n'),
  23. result = [];
  24. readString(input, quant)
  25. .pipe(parserStream())
  26. .pipe(
  27. new Writable({
  28. objectMode: true,
  29. write(chunk, _, callback) {
  30. result.push(chunk.value);
  31. callback(null);
  32. },
  33. final(callback) {
  34. t.deepEqual(objects, result);
  35. resolve();
  36. callback(null);
  37. }
  38. })
  39. );
  40. };
  41. test.asPromise('jsonl parserStream: smoke test', (t, resolve) => roundtrip(t, resolve));
  42. for (let i = 1; i <= 12; ++i) {
  43. test.asPromise('jsonl parserStream: roundtrip with a set of objects - ' + i, (t, resolve) => {
  44. roundtrip(t, resolve, i);
  45. });
  46. }
  47. for (let i = 1; i <= 12; ++i) {
  48. test.asPromise(
  49. 'jsonl parserStream: roundtrip with different window sizes - ' + i,
  50. (t, resolve) => {
  51. roundtrip(t, resolve, 10, i);
  52. }
  53. );
  54. }
  55. test.asPromise('jsonl parserStream: read file', (t, resolve) => {
  56. if (!/^file:\/\//.test(import.meta.url)) throw Error('Cannot get the current working directory');
  57. const isWindows = path.sep === '\\',
  58. fileName = path.join(
  59. path.dirname(import.meta.url.substring(isWindows ? 8 : 7)),
  60. './data/sample.jsonl.gz'
  61. );
  62. let count = 0;
  63. fs.createReadStream(fileName)
  64. .pipe(zlib.createGunzip())
  65. .pipe(parserStream())
  66. .pipe(
  67. new Writable({
  68. objectMode: true,
  69. write(chunk, _, callback) {
  70. t.equal(count, chunk.key);
  71. ++count;
  72. callback(null);
  73. },
  74. final(callback) {
  75. t.equal(count, 100);
  76. resolve();
  77. callback(null);
  78. }
  79. })
  80. );
  81. });
  82. test.asPromise('jsonl parserStream: bad json', (t, resolve) => {
  83. const pipeline = readString(' not json ').pipe(parserStream());
  84. pipeline.on('data', () => t.fail("We shouldn't be here."));
  85. pipeline.on('error', e => {
  86. t.ok(e);
  87. resolve();
  88. });
  89. pipeline.on('end', value => {
  90. t.fail("We shouldn't be here.");
  91. resolve();
  92. });
  93. });