test-jsonl-parserStream.mjs 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. 'use strict';
  2. import test from 'tape-six';
  3. import fs from 'fs';
  4. import path from 'path';
  5. import zlib from 'zlib';
  6. import {Writable} from 'stream';
  7. import {readString} from './helpers.mjs';
  8. import parserStream from '../src/jsonl/parserStream.js';
  9. const roundtrip = (t, resolve, len, quant) => {
  10. const objects = [];
  11. for (let n = 0; n < len; n += 1) {
  12. objects.push({
  13. stringWithTabsAndNewlines: "Did it work?\nNo...\t\tI don't think so...",
  14. anArray: [n + 1, n + 2, true, 'tabs?\t\t\t\u0001\u0002\u0003', false],
  15. n
  16. });
  17. }
  18. const json = [];
  19. for (let n = 0; n < objects.length; n += 1) {
  20. json.push(JSON.stringify(objects[n]));
  21. }
  22. const input = json.join('\n'),
  23. result = [];
  24. readString(input, quant)
  25. .pipe(parserStream())
  26. .pipe(
  27. new Writable({
  28. objectMode: true,
  29. write(chunk, _, callback) {
  30. result.push(chunk.value);
  31. callback(null);
  32. },
  33. final(callback) {
  34. t.deepEqual(objects, result);
  35. resolve();
  36. callback(null);
  37. }
  38. })
  39. );
  40. };
  41. test.asPromise('jsonl parserStream: smoke test', (t, resolve) => roundtrip(t, resolve));
  42. for (let i = 1; i <= 12; ++i) {
  43. test.asPromise('jsonl parserStream: roundtrip with a set of objects - ' + i, (t, resolve) => {
  44. roundtrip(t, resolve, i);
  45. });
  46. }
  47. for (let i = 1; i <= 12; ++i) {
  48. test.asPromise(
  49. 'jsonl parserStream: roundtrip with different window sizes - ' + i,
  50. (t, resolve) => {
  51. roundtrip(t, resolve, 10, i);
  52. }
  53. );
  54. }
  55. test.asPromise('jsonl parserStream: read file', (t, resolve) => {
  56. if (!/^file:\/\//.test(import.meta.url)) throw Error('Cannot get the current working directory');
  57. const fileName = path.join(path.dirname(import.meta.url.substring(7)), './data/sample.jsonl.gz');
  58. let count = 0;
  59. fs.createReadStream(fileName)
  60. .pipe(zlib.createGunzip())
  61. .pipe(parserStream())
  62. .pipe(
  63. new Writable({
  64. objectMode: true,
  65. write(chunk, _, callback) {
  66. t.equal(count, chunk.key);
  67. ++count;
  68. callback(null);
  69. },
  70. final(callback) {
  71. t.equal(count, 100);
  72. resolve();
  73. callback(null);
  74. }
  75. })
  76. );
  77. });
  78. test.asPromise('jsonl parserStream: bad json', (t, resolve) => {
  79. const pipeline = readString(' not json ').pipe(parserStream());
  80. pipeline.on('data', () => t.fail("We shouldn't be here."));
  81. pipeline.on('error', e => {
  82. t.ok(e);
  83. resolve();
  84. });
  85. pipeline.on('end', value => {
  86. t.fail("We shouldn't be here.");
  87. resolve();
  88. });
  89. });