test-jsonl-parser.mjs 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. 'use strict';
  2. import test from 'tape-six';
  3. import fs from 'fs';
  4. import path from 'path';
  5. import zlib from 'zlib';
  6. import {Writable} from 'stream';
  7. import {readString} from './helpers.mjs';
  8. import chain from '../src/index.js';
  9. import parser from '../src/jsonl/parser.js';
  10. const roundtrip = (t, resolve, len, quant) => {
  11. const objects = [];
  12. for (let n = 0; n < len; n += 1) {
  13. objects.push({
  14. stringWithTabsAndNewlines: "Did it work?\nNo...\t\tI don't think so...",
  15. anArray: [n + 1, n + 2, true, 'tabs?\t\t\t\u0001\u0002\u0003', false],
  16. n
  17. });
  18. }
  19. const json = [];
  20. for (let n = 0; n < objects.length; n += 1) {
  21. json.push(JSON.stringify(objects[n]));
  22. }
  23. const input = json.join('\n'),
  24. result = [];
  25. chain([
  26. readString(input, quant),
  27. parser(),
  28. new Writable({
  29. objectMode: true,
  30. write(chunk, _, callback) {
  31. result.push(chunk.value);
  32. callback(null);
  33. },
  34. final(callback) {
  35. t.deepEqual(objects, result);
  36. resolve();
  37. callback(null);
  38. }
  39. })
  40. ]);
  41. };
  42. test.asPromise('jsonl parser: smoke test', (t, resolve) => roundtrip(t, resolve));
  43. test.asPromise('jsonl parser: roundtrip with 1 set of objects', (t, resolve) => {
  44. roundtrip(t, resolve, 1);
  45. });
  46. test.asPromise('jsonl parser: roundtrip with 2 sets of objects', (t, resolve) => {
  47. roundtrip(t, resolve, 2);
  48. });
  49. test.asPromise('jsonl parser: roundtrip with 3 sets of objects', (t, resolve) => {
  50. roundtrip(t, resolve, 3);
  51. });
  52. test.asPromise('jsonl parser: roundtrip with 4 sets of objects', (t, resolve) => {
  53. roundtrip(t, resolve, 4);
  54. });
  55. test.asPromise('jsonl parser: roundtrip with 5 sets of objects', (t, resolve) => {
  56. roundtrip(t, resolve, 5);
  57. });
  58. test.asPromise('jsonl parser: roundtrip with 6 sets of objects', (t, resolve) => {
  59. roundtrip(t, resolve, 6);
  60. });
  61. test.asPromise('jsonl parser: roundtrip with 7 sets of objects', (t, resolve) => {
  62. roundtrip(t, resolve, 7);
  63. });
  64. test.asPromise('jsonl parser: roundtrip with 8 sets of objects', (t, resolve) => {
  65. roundtrip(t, resolve, 8);
  66. });
  67. test.asPromise('jsonl parser: roundtrip with 9 sets of objects', (t, resolve) => {
  68. roundtrip(t, resolve, 9);
  69. });
  70. test.asPromise('jsonl parser: roundtrip with 10 sets of objects', (t, resolve) => {
  71. roundtrip(t, resolve, 10);
  72. });
  73. test.asPromise('jsonl parser: roundtrip with 11 sets of objects', (t, resolve) => {
  74. roundtrip(t, resolve, 11);
  75. });
  76. test.asPromise('jsonl parser: roundtrip with 12 sets of objects', (t, resolve) => {
  77. roundtrip(t, resolve, 12);
  78. });
  79. for (let i = 1; i <= 12; ++i) {
  80. test.asPromise('jsonl parser: roundtrip with different window sizes - ' + i, (t, resolve) => {
  81. roundtrip(t, resolve, 10, i);
  82. });
  83. }
  84. test.asPromise('jsonl parser: read file', (t, resolve) => {
  85. if (!/^file:\/\//.test(import.meta.url)) throw Error('Cannot get the current working directory');
  86. const fileName = path.join(path.dirname(import.meta.url.substring(7)), './data/sample.jsonl.gz');
  87. let count = 0;
  88. chain([
  89. fs.createReadStream(fileName),
  90. zlib.createGunzip(),
  91. parser(),
  92. new Writable({
  93. objectMode: true,
  94. write(chunk, _, callback) {
  95. t.equal(count, chunk.key);
  96. ++count;
  97. callback(null);
  98. },
  99. final(callback) {
  100. t.equal(count, 100);
  101. resolve();
  102. callback(null);
  103. }
  104. })
  105. ]);
  106. });
  107. test.asPromise('jsonl parser: bad json', (t, resolve) => {
  108. const pipeline = chain([readString(' not json '), parser()]);
  109. pipeline.on('data', () => t.fail("We shouldn't be here."));
  110. pipeline.on('error', e => {
  111. t.ok(e);
  112. resolve();
  113. });
  114. pipeline.on('end', value => {
  115. t.fail("We shouldn't be here.");
  116. resolve();
  117. });
  118. });