initial commit
This commit is contained in:
17
src/dialect.ts
Normal file
17
src/dialect.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import type { CsvDialectOptions, ICsvDialect } from "./types";
|
||||
|
||||
export class CsvDialect implements ICsvDialect {
|
||||
public readonly delimiter: string;
|
||||
public readonly quote: string;
|
||||
public readonly trimWhitespace: boolean;
|
||||
|
||||
public static readonly Default: CsvDialect = new CsvDialect();
|
||||
|
||||
public constructor(options?: Partial<CsvDialectOptions>) {
|
||||
this.delimiter = options?.delimiter ?? ",";
|
||||
this.quote = options?.quote ?? '"';
|
||||
this.trimWhitespace = options?.trimWhitespace ?? true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
18
src/index.ts
Normal file
18
src/index.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
export type {
|
||||
CsvParseResult,
|
||||
CsvParseResultArray,
|
||||
CsvParseResultObject,
|
||||
ICsvDialect,
|
||||
IChunkSource,
|
||||
OutputMode,
|
||||
ParseOptions,
|
||||
RowArray,
|
||||
RowObject,
|
||||
} from "./types";
|
||||
|
||||
export { CsvDialect } from "./dialect";
|
||||
|
||||
export { CsvParser } from "./parser/CsvParser";
|
||||
|
||||
export { StringChunkSource } from "./sources/StringSource";
|
||||
export { FileChunkSource } from "./sources/FileSource";
|
||||
97
src/parser/CsvRowTokenizer.ts
Normal file
97
src/parser/CsvRowTokenizer.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import type { ICsvDialect, RowArray } from "../types";
|
||||
|
||||
export class CsvRowTokenizer {
|
||||
private readonly dialect: ICsvDialect;
|
||||
|
||||
private readonly rows: RowArray[] = [];
|
||||
private currentRow: RowArray = [];
|
||||
private currentField: string = "";
|
||||
private inQuotes: boolean = false;
|
||||
private lastWasCR: boolean = false;
|
||||
|
||||
public constructor(dialect: ICsvDialect) {
|
||||
this.dialect = dialect;
|
||||
}
|
||||
|
||||
public pushChunk(chunk: string): void {
|
||||
const { delimiter, quote } = this.dialect;
|
||||
|
||||
for (let index = 0; index < chunk.length; index += 1) {
|
||||
const char = chunk[index]!;
|
||||
|
||||
if (this.inQuotes) {
|
||||
if (char === quote) {
|
||||
const next = chunk[index + 1];
|
||||
if (next === quote) {
|
||||
this.currentField += quote;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
this.inQuotes = false;
|
||||
continue;
|
||||
}
|
||||
this.currentField += char;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === quote) {
|
||||
this.inQuotes = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === delimiter) {
|
||||
this.flushField();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === "\n") {
|
||||
if (!this.lastWasCR) {
|
||||
this.flushField();
|
||||
this.flushRow();
|
||||
}
|
||||
this.lastWasCR = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === "\r") {
|
||||
this.flushField();
|
||||
this.flushRow();
|
||||
this.lastWasCR = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
this.currentField += char;
|
||||
this.lastWasCR = false;
|
||||
}
|
||||
}
|
||||
|
||||
public takeRows(): RowArray[] {
|
||||
const out = this.rows.splice(0, this.rows.length);
|
||||
return out;
|
||||
}
|
||||
|
||||
public drain(): RowArray[] {
|
||||
if (this.inQuotes) {
|
||||
// Treat end-of-input as closing line when in quotes
|
||||
this.inQuotes = false;
|
||||
}
|
||||
this.flushField();
|
||||
if (this.currentRow.length > 0 || this.currentField.length > 0) {
|
||||
this.flushRow();
|
||||
}
|
||||
return this.takeRows();
|
||||
}
|
||||
|
||||
private flushField(): void {
|
||||
const value = this.dialect.trimWhitespace ? this.currentField.trim() : this.currentField;
|
||||
this.currentRow.push(value);
|
||||
this.currentField = "";
|
||||
}
|
||||
|
||||
private flushRow(): void {
|
||||
this.rows.push(this.currentRow);
|
||||
this.currentRow = [];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
38
src/sources/FileSource.ts
Normal file
38
src/sources/FileSource.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import type { IChunkSource } from "../types";
|
||||
|
||||
export class FileChunkSource implements IChunkSource {
|
||||
private readonly path: string;
|
||||
private readonly chunkSize: number;
|
||||
|
||||
public constructor(path: string, chunkSize: number = 64 * 1024) {
|
||||
this.path = path;
|
||||
this.chunkSize = Math.max(1024, chunkSize);
|
||||
}
|
||||
|
||||
public async *chunks(): AsyncIterable<string> {
|
||||
// Prefer Bun if available; fall back to Node streams to keep portability
|
||||
if (typeof Bun !== "undefined" && typeof Bun.file === "function") {
|
||||
const file = Bun.file(this.path);
|
||||
const reader = file.stream().getReader();
|
||||
try {
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
if (value) yield Buffer.from(value).toString("utf8");
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Node.js fallback
|
||||
const { createReadStream } = await import("node:fs");
|
||||
const stream = createReadStream(this.path, { encoding: "utf8", highWaterMark: this.chunkSize });
|
||||
for await (const chunk of stream) {
|
||||
yield chunk as string;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
19
src/sources/StringSource.ts
Normal file
19
src/sources/StringSource.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import type { IChunkSource } from "../types";
|
||||
|
||||
export class StringChunkSource implements IChunkSource {
|
||||
private readonly content: string;
|
||||
private readonly chunkSize: number;
|
||||
|
||||
public constructor(content: string, chunkSize: number = 64 * 1024) {
|
||||
this.content = content;
|
||||
this.chunkSize = Math.max(1024, chunkSize);
|
||||
}
|
||||
|
||||
public async *chunks(): AsyncIterable<string> {
|
||||
for (let offset = 0; offset < this.content.length; offset += this.chunkSize) {
|
||||
yield this.content.slice(offset, offset + this.chunkSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
42
src/types.ts
Normal file
42
src/types.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
export interface CsvDialectOptions {
|
||||
delimiter: string;
|
||||
quote: string;
|
||||
trimWhitespace: boolean;
|
||||
}
|
||||
|
||||
export interface ICsvDialect {
|
||||
readonly delimiter: string;
|
||||
readonly quote: string;
|
||||
readonly trimWhitespace: boolean;
|
||||
}
|
||||
|
||||
export type RowArray = string[];
|
||||
export type RowObject = Record<string, string>;
|
||||
|
||||
export type OutputMode = "array" | "object";
|
||||
|
||||
export interface ParseOptions {
|
||||
dialect?: Partial<CsvDialectOptions>;
|
||||
hasHeader?: boolean;
|
||||
output?: OutputMode;
|
||||
validateRowLength?: boolean;
|
||||
skipEmptyLines?: boolean;
|
||||
}
|
||||
|
||||
export interface CsvParseResultArray {
|
||||
headers?: string[];
|
||||
rows: RowArray[];
|
||||
}
|
||||
|
||||
export interface CsvParseResultObject {
|
||||
headers: string[];
|
||||
rows: RowObject[];
|
||||
}
|
||||
|
||||
export type CsvParseResult = CsvParseResultArray | CsvParseResultObject;
|
||||
|
||||
export interface IChunkSource {
|
||||
chunks(): AsyncIterable<string>;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user