From 10a8c54b70b2557578226c55485fcfcb95307202 Mon Sep 17 00:00:00 2001 From: Alexander Zinn Date: Wed, 20 Aug 2025 09:40:45 -0400 Subject: [PATCH] initial commit --- .../use-bun-instead-of-node-vite-npm-pnpm.mdc | 111 ++++++++++++++++++ .gitignore | 34 ++++++ .npmrc | 4 + .nvmrc | 1 + .prettierrc | 12 ++ README.md | 74 ++++++++++++ bunfig.toml | 5 + eslint.config.ts | 9 ++ package.json | 43 +++++++ src/dialect.ts | 17 +++ src/index.ts | 18 +++ src/parser/CsvRowTokenizer.ts | 97 +++++++++++++++ src/sources/FileSource.ts | 38 ++++++ src/sources/StringSource.ts | 19 +++ src/types.ts | 42 +++++++ test.__tmp.csv | 0 test/csv.spec.ts | 81 +++++++++++++ tsconfig.json | 36 ++++++ 18 files changed, 641 insertions(+) create mode 100644 .cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc create mode 100644 .gitignore create mode 100644 .npmrc create mode 100644 .nvmrc create mode 100644 .prettierrc create mode 100644 README.md create mode 100644 bunfig.toml create mode 100644 eslint.config.ts create mode 100644 package.json create mode 100644 src/dialect.ts create mode 100644 src/index.ts create mode 100644 src/parser/CsvRowTokenizer.ts create mode 100644 src/sources/FileSource.ts create mode 100644 src/sources/StringSource.ts create mode 100644 src/types.ts create mode 100644 test.__tmp.csv create mode 100644 test/csv.spec.ts create mode 100644 tsconfig.json diff --git a/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc b/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc new file mode 100644 index 0000000..b8100b7 --- /dev/null +++ b/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc @@ -0,0 +1,111 @@ +--- +description: Use Bun instead of Node.js, npm, pnpm, or vite. +globs: "*.ts, *.tsx, *.html, *.css, *.js, *.jsx, package.json" +alwaysApply: false +--- + +Default to using Bun instead of Node.js. + +- Use `bun ` instead of `node ` or `ts-node ` +- Use `bun test` instead of `jest` or `vitest` +- Use `bun build ` instead of `webpack` or `esbuild` +- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install` +- Use `bun run + + +``` + +With the following `frontend.tsx`: + +```tsx#frontend.tsx +import React from "react"; + +// import .css files directly and it works +import './index.css'; + +import { createRoot } from "react-dom/client"; + +const root = createRoot(document.body); + +export default function Frontend() { + return

Hello, world!

; +} + +root.render(); +``` + +Then, run index.ts + +```sh +bun --hot ./index.ts +``` + +For more information, read the Bun API docs in `node_modules/bun-types/docs/**.md`. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a14702c --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# dependencies (bun install) +node_modules + +# output +out +dist +*.tgz + +# code coverage +coverage +*.lcov + +# logs +logs +_.log +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# caches +.eslintcache +.cache +*.tsbuildinfo + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..a494ac2 --- /dev/null +++ b/.npmrc @@ -0,0 +1,4 @@ +//registry-node.techniker.me/:_authToken="${NODE_REGISTRY_AUTH_TOKEN}" +@techniker-me:registry=https://registry-node.techniker.me +save-exact=true +package-lock=false \ No newline at end of file diff --git a/.nvmrc b/.nvmrc new file mode 100644 index 0000000..2edeafb --- /dev/null +++ b/.nvmrc @@ -0,0 +1 @@ +20 \ No newline at end of file diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..caa814d --- /dev/null +++ b/.prettierrc @@ -0,0 +1,12 @@ +{ + "arrowParens": "avoid", + "bracketSameLine": true, + "bracketSpacing": false, + "printWidth": 160, + "semi": true, + "singleAttributePerLine": false, + "singleQuote": true, + "tabWidth": 2, + "trailingComma": "none", + "useTabs": false +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9a06b1 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# @techniker-me/csv + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run src/index.ts +``` + +This project was created using `bun init` in bun v1.2.20. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime. + +## Usage + +### Parse from string + +```ts +import { CsvParser } from "@techniker-me/csv"; + +const parser = new CsvParser(); +const csv = "name,age\nAlice,30\nBob,40"; +const result = await parser.parseFromString(csv, { + hasHeader: true, + output: "object", // "object" or "array" +}); + +// result.headers -> ["name", "age"] +// result.rows -> [{ name: "Alice", age: "30" }, { name: "Bob", age: "40" }] +``` + +### Parse from file + +```ts +import { CsvParser, FileChunkSource } from "@techniker-me/csv"; + +const parser = new CsvParser(); +const source = new FileChunkSource("/path/to/file.csv"); +const result = await parser.parseFromSource(source, { + hasHeader: true, + output: "array", +}); +``` + +### Dialect options + +```ts +import { CsvParser, CsvDialect } from "@techniker-me/csv"; + +const parser = new CsvParser(); +const result = await parser.parseFromString("a;b\n1;2", { + hasHeader: true, + dialect: { delimiter: ";", quote: '"', trimWhitespace: true }, +}); +``` + +### API + +- `CsvParser.parseFromString(input: string, options?: ParseOptions)` +- `CsvParser.parseFromSource(source: IChunkSource, options?: ParseOptions)` +- `FileChunkSource(path: string, chunkSize?: number)` +- `StringChunkSource(content: string, chunkSize?: number)` + +`ParseOptions`: + +- `dialect` (partial): `{ delimiter, quote, trimWhitespace }` +- `hasHeader`: treat first row as headers (default true for object mode, optional for array mode) +- `output`: `"object" | "array"` (default `"array"`) +- `validateRowLength`: pads/truncates rows to uniform length (default true) +- `skipEmptyLines`: ignore empty lines (default true) + diff --git a/bunfig.toml b/bunfig.toml new file mode 100644 index 0000000..ae22d90 --- /dev/null +++ b/bunfig.toml @@ -0,0 +1,5 @@ +[install.lockfile] +save = false + +[install.scopes] +"@techniker-me" = "https://registry-node.techniker.me" diff --git a/eslint.config.ts b/eslint.config.ts new file mode 100644 index 0000000..54d89d6 --- /dev/null +++ b/eslint.config.ts @@ -0,0 +1,9 @@ +import js from "@eslint/js"; +import globals from "globals"; +import tseslint from "typescript-eslint"; +import { defineConfig } from "eslint/config"; + +export default defineConfig([ + { files: ["**/*.{js,mjs,cjs,ts,mts,cts}"], plugins: { js }, extends: ["js/recommended"], languageOptions: { globals: {...globals.browser, ...globals.node} } }, + tseslint.configs.recommended, +]); diff --git a/package.json b/package.json new file mode 100644 index 0000000..37c5420 --- /dev/null +++ b/package.json @@ -0,0 +1,43 @@ +{ + "name": "@techniker-me/csv", + "version": "0.0.0", + "module": "src/index.ts", + "type": "module", + + "scripts": { + "ci-build": "bun run build:node && bun run build:browser && bun run build:types", + "format": "prettier --write ./", + "test": "bun test", + "test:watch": "bun test --watch", + "test:coverage": "bun test --coverage", + "prebuild": "bun run clean", + "prelint": "bun install", + "lint": "eslint src/**/*.ts", + "lint:fix": "eslint src/**/*.ts --fix", + "build": "bun run build:node && bun run build:browser && bun run build:types", + "build:node": "bun build src/index.ts --outdir dist/node --target node --format esm --production", + "build:browser": "bun build src/index.ts --outdir dist/browser --target browser --format esm --production", + "build:types": "tsc --emitDeclarationOnly --outDir dist/types", + "build:node:dev": "bun build src/index.ts --outdir dist/node --target node --format esm --development", + "build:browser:dev": "bun build src/index.ts --outdir dist/browser --target browser --format esm --development", + "build:types:dev": "tsc --emitDeclarationOnly --outDir dist/types", + "prebuild:dev": "bun run clean", + "build:dev": "bun run build:node:dev && bun run build:browser:dev && bun run build:types:dev", + "postclean": "bun run lint", + "clean": "rm -rf dist" + }, + "devDependencies": { + "@eslint/js": "9.33.0", + "@types/bun": "latest", + "@types/node": "24.3.0", + "chai": "5.3.1", + "chai-as-promised": "8.0.1", + "eslint": "9.33.0", + "globals": "16.3.0", + "jiti": "2.5.1", + "mocha": "11.7.1", + "prettier": "3.6.2", + "typescript": "5.9.2", + "typescript-eslint": "8.40.0" + } +} diff --git a/src/dialect.ts b/src/dialect.ts new file mode 100644 index 0000000..4e08094 --- /dev/null +++ b/src/dialect.ts @@ -0,0 +1,17 @@ +import type { CsvDialectOptions, ICsvDialect } from "./types"; + +export class CsvDialect implements ICsvDialect { + public readonly delimiter: string; + public readonly quote: string; + public readonly trimWhitespace: boolean; + + public static readonly Default: CsvDialect = new CsvDialect(); + + public constructor(options?: Partial) { + this.delimiter = options?.delimiter ?? ","; + this.quote = options?.quote ?? '"'; + this.trimWhitespace = options?.trimWhitespace ?? true; + } +} + + diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..5654690 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,18 @@ +export type { + CsvParseResult, + CsvParseResultArray, + CsvParseResultObject, + ICsvDialect, + IChunkSource, + OutputMode, + ParseOptions, + RowArray, + RowObject, +} from "./types"; + +export { CsvDialect } from "./dialect"; + +export { CsvParser } from "./parser/CsvParser"; + +export { StringChunkSource } from "./sources/StringSource"; +export { FileChunkSource } from "./sources/FileSource"; \ No newline at end of file diff --git a/src/parser/CsvRowTokenizer.ts b/src/parser/CsvRowTokenizer.ts new file mode 100644 index 0000000..fceb466 --- /dev/null +++ b/src/parser/CsvRowTokenizer.ts @@ -0,0 +1,97 @@ +import type { ICsvDialect, RowArray } from "../types"; + +export class CsvRowTokenizer { + private readonly dialect: ICsvDialect; + + private readonly rows: RowArray[] = []; + private currentRow: RowArray = []; + private currentField: string = ""; + private inQuotes: boolean = false; + private lastWasCR: boolean = false; + + public constructor(dialect: ICsvDialect) { + this.dialect = dialect; + } + + public pushChunk(chunk: string): void { + const { delimiter, quote } = this.dialect; + + for (let index = 0; index < chunk.length; index += 1) { + const char = chunk[index]!; + + if (this.inQuotes) { + if (char === quote) { + const next = chunk[index + 1]; + if (next === quote) { + this.currentField += quote; + index += 1; + continue; + } + this.inQuotes = false; + continue; + } + this.currentField += char; + continue; + } + + if (char === quote) { + this.inQuotes = true; + continue; + } + + if (char === delimiter) { + this.flushField(); + continue; + } + + if (char === "\n") { + if (!this.lastWasCR) { + this.flushField(); + this.flushRow(); + } + this.lastWasCR = false; + continue; + } + + if (char === "\r") { + this.flushField(); + this.flushRow(); + this.lastWasCR = true; + continue; + } + + this.currentField += char; + this.lastWasCR = false; + } + } + + public takeRows(): RowArray[] { + const out = this.rows.splice(0, this.rows.length); + return out; + } + + public drain(): RowArray[] { + if (this.inQuotes) { + // Treat end-of-input as closing line when in quotes + this.inQuotes = false; + } + this.flushField(); + if (this.currentRow.length > 0 || this.currentField.length > 0) { + this.flushRow(); + } + return this.takeRows(); + } + + private flushField(): void { + const value = this.dialect.trimWhitespace ? this.currentField.trim() : this.currentField; + this.currentRow.push(value); + this.currentField = ""; + } + + private flushRow(): void { + this.rows.push(this.currentRow); + this.currentRow = []; + } +} + + diff --git a/src/sources/FileSource.ts b/src/sources/FileSource.ts new file mode 100644 index 0000000..6a5e6b1 --- /dev/null +++ b/src/sources/FileSource.ts @@ -0,0 +1,38 @@ +import type { IChunkSource } from "../types"; + +export class FileChunkSource implements IChunkSource { + private readonly path: string; + private readonly chunkSize: number; + + public constructor(path: string, chunkSize: number = 64 * 1024) { + this.path = path; + this.chunkSize = Math.max(1024, chunkSize); + } + + public async *chunks(): AsyncIterable { + // Prefer Bun if available; fall back to Node streams to keep portability + if (typeof Bun !== "undefined" && typeof Bun.file === "function") { + const file = Bun.file(this.path); + const reader = file.stream().getReader(); + try { + while (true) { + const { value, done } = await reader.read(); + if (done) break; + if (value) yield Buffer.from(value).toString("utf8"); + } + } finally { + reader.releaseLock(); + } + return; + } + + // Node.js fallback + const { createReadStream } = await import("node:fs"); + const stream = createReadStream(this.path, { encoding: "utf8", highWaterMark: this.chunkSize }); + for await (const chunk of stream) { + yield chunk as string; + } + } +} + + diff --git a/src/sources/StringSource.ts b/src/sources/StringSource.ts new file mode 100644 index 0000000..f94198c --- /dev/null +++ b/src/sources/StringSource.ts @@ -0,0 +1,19 @@ +import type { IChunkSource } from "../types"; + +export class StringChunkSource implements IChunkSource { + private readonly content: string; + private readonly chunkSize: number; + + public constructor(content: string, chunkSize: number = 64 * 1024) { + this.content = content; + this.chunkSize = Math.max(1024, chunkSize); + } + + public async *chunks(): AsyncIterable { + for (let offset = 0; offset < this.content.length; offset += this.chunkSize) { + yield this.content.slice(offset, offset + this.chunkSize); + } + } +} + + diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..b48b720 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,42 @@ +export interface CsvDialectOptions { + delimiter: string; + quote: string; + trimWhitespace: boolean; +} + +export interface ICsvDialect { + readonly delimiter: string; + readonly quote: string; + readonly trimWhitespace: boolean; +} + +export type RowArray = string[]; +export type RowObject = Record; + +export type OutputMode = "array" | "object"; + +export interface ParseOptions { + dialect?: Partial; + hasHeader?: boolean; + output?: OutputMode; + validateRowLength?: boolean; + skipEmptyLines?: boolean; +} + +export interface CsvParseResultArray { + headers?: string[]; + rows: RowArray[]; +} + +export interface CsvParseResultObject { + headers: string[]; + rows: RowObject[]; +} + +export type CsvParseResult = CsvParseResultArray | CsvParseResultObject; + +export interface IChunkSource { + chunks(): AsyncIterable; +} + + diff --git a/test.__tmp.csv b/test.__tmp.csv new file mode 100644 index 0000000..e69de29 diff --git a/test/csv.spec.ts b/test/csv.spec.ts new file mode 100644 index 0000000..2a2ad9f --- /dev/null +++ b/test/csv.spec.ts @@ -0,0 +1,81 @@ +import { describe, it, expect } from "bun:test"; +import { CsvParser, FileChunkSource } from "../src/index.ts"; + +describe("CsvParser - string input", () => { + it("parses simple CSV with headers to objects", async () => { + const parser = new CsvParser(); + const csv = "name,age\nAlice,30\nBob,40"; + const result = await parser.parseFromString(csv, { hasHeader: true, output: "object" }); + + expect(result.headers).toEqual(["name", "age"]); + expect(result.rows).toEqual([ + { name: "Alice", age: "30" }, + { name: "Bob", age: "40" }, + ]); + }); + + it("handles quoted fields with commas and escaped quotes", async () => { + const parser = new CsvParser(); + const csv = [ + '"name","note"', + '"Alice","He said ""hello"", world"', + '"Bob","x,y,z"', + ].join("\n"); + + const result = await parser.parseFromString(csv, { hasHeader: true, output: "object" }); + expect(result.rows).toEqual([ + { name: "Alice", note: 'He said "hello", world' }, + { name: "Bob", note: "x,y,z" }, + ]); + }); + + it("supports CRLF line endings", async () => { + const parser = new CsvParser(); + const csv = "a,b\r\n1,2\r\n3,4"; + const result = await parser.parseFromString(csv, { hasHeader: true }); + expect(result.headers).toEqual(["a", "b"]); + expect(result.rows).toEqual([ + ["1", "2"], + ["3", "4"], + ]); + }); + + it("skips empty lines by default", async () => { + const parser = new CsvParser(); + const csv = "a,b\n\n1,2\n\n"; + const result = await parser.parseFromString(csv, { hasHeader: true }); + expect(result.rows).toEqual([["1", "2"]]); + }); + + it("normalizes row length when validateRowLength is true (default)", async () => { + const parser = new CsvParser(); + const csv = "a,b,c\n1,2\n3,4,5"; + const result = await parser.parseFromString(csv, { hasHeader: true }); + expect(result.rows).toEqual([ + ["1", "2", ""], + ["3", "4", "5"], + ]); + }); +}); + +describe("CsvParser - file input", () => { + it("parses a CSV file via FileChunkSource", async () => { + const parser = new CsvParser(); + const content = "city,temp\nBerlin,12\nMunich,10"; + const tmpPath = `${process.cwd()}/test.__tmp.csv`; + await Bun.write(tmpPath, content); + try { + const source = new FileChunkSource(tmpPath); + const result = await parser.parseFromSource(source, { hasHeader: true, output: "object" }); + expect(result.rows).toEqual([ + { city: "Berlin", temp: "12" }, + { city: "Munich", temp: "10" }, + ]); + } finally { + // best-effort cleanup + try { await Bun.write(tmpPath, ""); } catch {} + } + }); +}); + + diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..9ef9612 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,36 @@ +{ + "compilerOptions": { + // Environment setup & latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "emitDeclarationOnly": true, + "declaration": true, + "declarationDir": "./dist/types", + "outDir": "./dist/node", + "rootDir": "./src", + "declarationMap": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + }, + "include": ["src"], + "exclude": ["test", "dist", "node_modules"] +}