initial commit

This commit is contained in:
2025-08-20 09:40:45 -04:00
commit 10a8c54b70
18 changed files with 641 additions and 0 deletions

17
src/dialect.ts Normal file
View File

@@ -0,0 +1,17 @@
import type { CsvDialectOptions, ICsvDialect } from "./types";
export class CsvDialect implements ICsvDialect {
public readonly delimiter: string;
public readonly quote: string;
public readonly trimWhitespace: boolean;
public static readonly Default: CsvDialect = new CsvDialect();
public constructor(options?: Partial<CsvDialectOptions>) {
this.delimiter = options?.delimiter ?? ",";
this.quote = options?.quote ?? '"';
this.trimWhitespace = options?.trimWhitespace ?? true;
}
}

18
src/index.ts Normal file
View File

@@ -0,0 +1,18 @@
export type {
CsvParseResult,
CsvParseResultArray,
CsvParseResultObject,
ICsvDialect,
IChunkSource,
OutputMode,
ParseOptions,
RowArray,
RowObject,
} from "./types";
export { CsvDialect } from "./dialect";
export { CsvParser } from "./parser/CsvParser";
export { StringChunkSource } from "./sources/StringSource";
export { FileChunkSource } from "./sources/FileSource";

View File

@@ -0,0 +1,97 @@
import type { ICsvDialect, RowArray } from "../types";
export class CsvRowTokenizer {
private readonly dialect: ICsvDialect;
private readonly rows: RowArray[] = [];
private currentRow: RowArray = [];
private currentField: string = "";
private inQuotes: boolean = false;
private lastWasCR: boolean = false;
public constructor(dialect: ICsvDialect) {
this.dialect = dialect;
}
public pushChunk(chunk: string): void {
const { delimiter, quote } = this.dialect;
for (let index = 0; index < chunk.length; index += 1) {
const char = chunk[index]!;
if (this.inQuotes) {
if (char === quote) {
const next = chunk[index + 1];
if (next === quote) {
this.currentField += quote;
index += 1;
continue;
}
this.inQuotes = false;
continue;
}
this.currentField += char;
continue;
}
if (char === quote) {
this.inQuotes = true;
continue;
}
if (char === delimiter) {
this.flushField();
continue;
}
if (char === "\n") {
if (!this.lastWasCR) {
this.flushField();
this.flushRow();
}
this.lastWasCR = false;
continue;
}
if (char === "\r") {
this.flushField();
this.flushRow();
this.lastWasCR = true;
continue;
}
this.currentField += char;
this.lastWasCR = false;
}
}
public takeRows(): RowArray[] {
const out = this.rows.splice(0, this.rows.length);
return out;
}
public drain(): RowArray[] {
if (this.inQuotes) {
// Treat end-of-input as closing line when in quotes
this.inQuotes = false;
}
this.flushField();
if (this.currentRow.length > 0 || this.currentField.length > 0) {
this.flushRow();
}
return this.takeRows();
}
private flushField(): void {
const value = this.dialect.trimWhitespace ? this.currentField.trim() : this.currentField;
this.currentRow.push(value);
this.currentField = "";
}
private flushRow(): void {
this.rows.push(this.currentRow);
this.currentRow = [];
}
}

38
src/sources/FileSource.ts Normal file
View File

@@ -0,0 +1,38 @@
import type { IChunkSource } from "../types";
export class FileChunkSource implements IChunkSource {
private readonly path: string;
private readonly chunkSize: number;
public constructor(path: string, chunkSize: number = 64 * 1024) {
this.path = path;
this.chunkSize = Math.max(1024, chunkSize);
}
public async *chunks(): AsyncIterable<string> {
// Prefer Bun if available; fall back to Node streams to keep portability
if (typeof Bun !== "undefined" && typeof Bun.file === "function") {
const file = Bun.file(this.path);
const reader = file.stream().getReader();
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
if (value) yield Buffer.from(value).toString("utf8");
}
} finally {
reader.releaseLock();
}
return;
}
// Node.js fallback
const { createReadStream } = await import("node:fs");
const stream = createReadStream(this.path, { encoding: "utf8", highWaterMark: this.chunkSize });
for await (const chunk of stream) {
yield chunk as string;
}
}
}

View File

@@ -0,0 +1,19 @@
import type { IChunkSource } from "../types";
export class StringChunkSource implements IChunkSource {
private readonly content: string;
private readonly chunkSize: number;
public constructor(content: string, chunkSize: number = 64 * 1024) {
this.content = content;
this.chunkSize = Math.max(1024, chunkSize);
}
public async *chunks(): AsyncIterable<string> {
for (let offset = 0; offset < this.content.length; offset += this.chunkSize) {
yield this.content.slice(offset, offset + this.chunkSize);
}
}
}

42
src/types.ts Normal file
View File

@@ -0,0 +1,42 @@
export interface CsvDialectOptions {
delimiter: string;
quote: string;
trimWhitespace: boolean;
}
export interface ICsvDialect {
readonly delimiter: string;
readonly quote: string;
readonly trimWhitespace: boolean;
}
export type RowArray = string[];
export type RowObject = Record<string, string>;
export type OutputMode = "array" | "object";
export interface ParseOptions {
dialect?: Partial<CsvDialectOptions>;
hasHeader?: boolean;
output?: OutputMode;
validateRowLength?: boolean;
skipEmptyLines?: boolean;
}
export interface CsvParseResultArray {
headers?: string[];
rows: RowArray[];
}
export interface CsvParseResultObject {
headers: string[];
rows: RowObject[];
}
export type CsvParseResult = CsvParseResultArray | CsvParseResultObject;
export interface IChunkSource {
chunks(): AsyncIterable<string>;
}