From 79ddec7d4df3c01491d633546b19aa2670778c55 Mon Sep 17 00:00:00 2001 From: Alexander Zinn Date: Wed, 20 Aug 2025 09:49:11 -0400 Subject: [PATCH] clean up for deployment --- src/index.ts | 1 + src/parser/CsvParser.ts | 106 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 src/parser/CsvParser.ts diff --git a/src/index.ts b/src/index.ts index 56cc03b..60f3648 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,6 +12,7 @@ export type { export { CsvDialect } from "./dialect"; export { CsvRowTokenizer } from "./parser/CsvRowTokenizer"; +export { CsvParser } from "./parser/CsvParser"; export { StringChunkSource } from "./sources/StringSource"; export { FileChunkSource } from "./sources/FileSource"; \ No newline at end of file diff --git a/src/parser/CsvParser.ts b/src/parser/CsvParser.ts new file mode 100644 index 0000000..1c5ba62 --- /dev/null +++ b/src/parser/CsvParser.ts @@ -0,0 +1,106 @@ +import { CsvDialect } from "../dialect"; +import type { + CsvParseResult, + CsvParseResultArray, + CsvParseResultObject, + ICsvDialect, + IChunkSource, + ParseOptions, + RowArray, + RowObject, +} from "../types"; +import { CsvRowTokenizer } from "./CsvRowTokenizer"; + +export interface ICsvParser { + parseFromString(input: string, options?: ParseOptions): Promise; + parseFromSource(source: IChunkSource, options?: ParseOptions): Promise; +} + +export class CsvParser implements ICsvParser { + public async parseFromString(input: string, options?: ParseOptions): Promise { + const sourceModule = await import("../sources/StringSource"); + const source = new sourceModule.StringChunkSource(input); + return this.parseFromSource(source, options); + } + + public async parseFromSource(source: IChunkSource, options?: ParseOptions): Promise { + const dialect = this.createDialect(options); + const tokenizer = new CsvRowTokenizer(dialect); + + for await (const chunk of source.chunks()) { + tokenizer.pushChunk(chunk); + } + + const rows = tokenizer.drain(); + const sanitizedRows = this.postProcessRows(rows, options); + + if ((options?.output ?? "array") === "object") { + return this.mapToObjects(sanitizedRows, options); + } + + const result: CsvParseResultArray = { + headers: options?.hasHeader ? sanitizedRows[0] : undefined, + rows: options?.hasHeader ? sanitizedRows.slice(1) : sanitizedRows, + }; + return result; + } + + private createDialect(options?: ParseOptions): ICsvDialect { + const dialect = new CsvDialect(options?.dialect); + return dialect; + } + + private postProcessRows(rows: RowArray[], options?: ParseOptions): RowArray[] { + const validate = options?.validateRowLength ?? true; + const skipEmpty = options?.skipEmptyLines ?? true; + + const filtered = rows.filter((row) => { + if (!skipEmpty) return true; + return row.length > 1 || (row.length === 1 && row[0] !== ""); + }); + + if (!validate || filtered.length === 0) return filtered; + + const targetLength = filtered[0]!.length; + for (const row of filtered) { + if (row.length !== targetLength) { + if (row.length < targetLength) { + while (row.length < targetLength) row.push(""); + } else { + row.length = targetLength; + } + } + } + + return filtered; + } + + private mapToObjects(rows: RowArray[], options?: ParseOptions): CsvParseResultObject { + const hasHeader = options?.hasHeader ?? true; + const headers = hasHeader ? rows[0] ?? [] : this.generateHeaders(rows[0]?.length ?? 0); + const rowStartIndex = hasHeader ? 1 : 0; + const objects: RowObject[] = []; + + for (let index = rowStartIndex; index < rows.length; index += 1) { + const row = rows[index]!; + const obj: RowObject = {}; + for (let c = 0; c < headers.length; c += 1) { + const key = headers[c] ?? `col${c}`; + obj[key] = row[c] ?? ""; + } + objects.push(obj); + } + + return { headers, rows: objects }; + } + + private generateHeaders(length: number): string[] { + const headers: string[] = []; + for (let i = 0; i < length; i += 1) { + headers.push(`col${i}`); + } + return headers; + } +} + +