107 lines
3.2 KiB
TypeScript
107 lines
3.2 KiB
TypeScript
import { CsvDialect } from "../dialect";
|
|
import type {
|
|
CsvParseResult,
|
|
CsvParseResultArray,
|
|
CsvParseResultObject,
|
|
ICsvDialect,
|
|
IChunkSource,
|
|
ParseOptions,
|
|
RowArray,
|
|
RowObject,
|
|
} from "../types";
|
|
import { CsvRowTokenizer } from "./CsvRowTokenizer";
|
|
|
|
export interface ICsvParser {
|
|
parseFromString(input: string, options?: ParseOptions): Promise<CsvParseResult>;
|
|
parseFromSource(source: IChunkSource, options?: ParseOptions): Promise<CsvParseResult>;
|
|
}
|
|
|
|
export class CsvParser implements ICsvParser {
|
|
public async parseFromString(input: string, options?: ParseOptions): Promise<CsvParseResult> {
|
|
const sourceModule = await import("../sources/StringSource");
|
|
const source = new sourceModule.StringChunkSource(input);
|
|
return this.parseFromSource(source, options);
|
|
}
|
|
|
|
public async parseFromSource(source: IChunkSource, options?: ParseOptions): Promise<CsvParseResult> {
|
|
const dialect = this.createDialect(options);
|
|
const tokenizer = new CsvRowTokenizer(dialect);
|
|
|
|
for await (const chunk of source.chunks()) {
|
|
tokenizer.pushChunk(chunk);
|
|
}
|
|
|
|
const rows = tokenizer.drain();
|
|
const sanitizedRows = this.postProcessRows(rows, options);
|
|
|
|
if ((options?.output ?? "array") === "object") {
|
|
return this.mapToObjects(sanitizedRows, options);
|
|
}
|
|
|
|
const result: CsvParseResultArray = {
|
|
headers: options?.hasHeader ? sanitizedRows[0] : undefined,
|
|
rows: options?.hasHeader ? sanitizedRows.slice(1) : sanitizedRows,
|
|
};
|
|
return result;
|
|
}
|
|
|
|
private createDialect(options?: ParseOptions): ICsvDialect {
|
|
const dialect = new CsvDialect(options?.dialect);
|
|
return dialect;
|
|
}
|
|
|
|
private postProcessRows(rows: RowArray[], options?: ParseOptions): RowArray[] {
|
|
const validate = options?.validateRowLength ?? true;
|
|
const skipEmpty = options?.skipEmptyLines ?? true;
|
|
|
|
const filtered = rows.filter((row) => {
|
|
if (!skipEmpty) return true;
|
|
return row.length > 1 || (row.length === 1 && row[0] !== "");
|
|
});
|
|
|
|
if (!validate || filtered.length === 0) return filtered;
|
|
|
|
const targetLength = filtered[0]!.length;
|
|
for (const row of filtered) {
|
|
if (row.length !== targetLength) {
|
|
if (row.length < targetLength) {
|
|
while (row.length < targetLength) row.push("");
|
|
} else {
|
|
row.length = targetLength;
|
|
}
|
|
}
|
|
}
|
|
|
|
return filtered;
|
|
}
|
|
|
|
private mapToObjects(rows: RowArray[], options?: ParseOptions): CsvParseResultObject {
|
|
const hasHeader = options?.hasHeader ?? true;
|
|
const headers = hasHeader ? rows[0] ?? [] : this.generateHeaders(rows[0]?.length ?? 0);
|
|
const rowStartIndex = hasHeader ? 1 : 0;
|
|
const objects: RowObject[] = [];
|
|
|
|
for (let index = rowStartIndex; index < rows.length; index += 1) {
|
|
const row = rows[index]!;
|
|
const obj: RowObject = {};
|
|
for (let c = 0; c < headers.length; c += 1) {
|
|
const key = headers[c] ?? `col${c}`;
|
|
obj[key] = row[c] ?? "";
|
|
}
|
|
objects.push(obj);
|
|
}
|
|
|
|
return { headers, rows: objects };
|
|
}
|
|
|
|
private generateHeaders(length: number): string[] {
|
|
const headers: string[] = [];
|
|
for (let i = 0; i < length; i += 1) {
|
|
headers.push(`col${i}`);
|
|
}
|
|
return headers;
|
|
}
|
|
}
|
|
|
|
|