test-jsonl-parser.mjs 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. 'use strict';
  2. import test from 'tape-six';
  3. import fs from 'fs';
  4. import path from 'path';
  5. import zlib from 'zlib';
  6. import {Writable} from 'stream';
  7. import {readString} from './helpers.mjs';
  8. import parser from '../src/jsonl/parser.js';
  9. const roundtrip = (t, resolve, len, quant) => {
  10. const objects = [];
  11. for (let n = 0; n < len; n += 1) {
  12. objects.push({
  13. stringWithTabsAndNewlines: "Did it work?\nNo...\t\tI don't think so...",
  14. anArray: [n + 1, n + 2, true, 'tabs?\t\t\t\u0001\u0002\u0003', false],
  15. n
  16. });
  17. }
  18. const json = [];
  19. for (let n = 0; n < objects.length; n += 1) {
  20. json.push(JSON.stringify(objects[n]));
  21. }
  22. const input = json.join('\n'),
  23. result = [];
  24. readString(input, quant)
  25. .pipe(parser())
  26. .pipe(
  27. new Writable({
  28. objectMode: true,
  29. write(chunk, _, callback) {
  30. result.push(chunk.value);
  31. callback(null);
  32. },
  33. final(callback) {
  34. t.deepEqual(objects, result);
  35. resolve();
  36. callback(null);
  37. }
  38. })
  39. );
  40. };
  41. test.asPromise('jsonl parser: smoke test', (t, resolve) => roundtrip(t, resolve));
  42. test.asPromise('jsonl parser: roundtrip with 1 set of objects', (t, resolve) => {
  43. roundtrip(t, resolve, 1)
  44. });
  45. test.asPromise('jsonl parser: roundtrip with 2 sets of objects', (t, resolve) => {
  46. roundtrip(t, resolve, 2);
  47. });
  48. test.asPromise('jsonl parser: roundtrip with 3 sets of objects', (t, resolve) => {
  49. roundtrip(t, resolve, 3);
  50. });
  51. test.asPromise('jsonl parser: roundtrip with 4 sets of objects', (t, resolve) => {
  52. roundtrip(t, resolve, 4);
  53. });
  54. test.asPromise('jsonl parser: roundtrip with 5 sets of objects', (t, resolve) => {
  55. roundtrip(t, resolve, 5);
  56. });
  57. test.asPromise('jsonl parser: roundtrip with 6 sets of objects', (t, resolve) => {
  58. roundtrip(t, resolve, 6);
  59. });
  60. test.asPromise('jsonl parser: roundtrip with 7 sets of objects', (t, resolve) => {
  61. roundtrip(t, resolve, 7);
  62. });
  63. test.asPromise('jsonl parser: roundtrip with 8 sets of objects', (t, resolve) => {
  64. roundtrip(t, resolve, 8);
  65. });
  66. test.asPromise('jsonl parser: roundtrip with 9 sets of objects', (t, resolve) => {
  67. roundtrip(t, resolve, 9);
  68. });
  69. test.asPromise('jsonl parser: roundtrip with 10 sets of objects', (t, resolve) => {
  70. roundtrip(t, resolve, 10);
  71. });
  72. test.asPromise('jsonl parser: roundtrip with 11 sets of objects', (t, resolve) => {
  73. roundtrip(t, resolve, 11);
  74. });
  75. test.asPromise('jsonl parser: roundtrip with 12 sets of objects', (t, resolve) => {
  76. roundtrip(t, resolve, 12);
  77. });
  78. test.asPromise('jsonl parser: roundtrip with different window sizes', (t, resolve) => {
  79. for (let i = 1; i <=12; ++i) {
  80. roundtrip(t, resolve, 10, i);
  81. }
  82. });
  83. test.asPromise('jsonl parser: read file', (t, resolve) => {
  84. if (!/^file:\/\//.test(import.meta.url)) throw Error('Cannot get the current working directory');
  85. const fileName = path.join(path.dirname(import.meta.url.substring(7)), './data/sample.jsonl.gz');
  86. let count = 0;
  87. fs.createReadStream(fileName)
  88. .pipe(zlib.createGunzip())
  89. .pipe(parser())
  90. .pipe(
  91. new Writable({
  92. objectMode: true,
  93. write(chunk, _, callback) {
  94. t.equal(count, chunk.key);
  95. ++count;
  96. callback(null);
  97. },
  98. final(callback) {
  99. t.equal(count, 100);
  100. resolve();
  101. callback(null);
  102. }
  103. })
  104. );
  105. });