commit 10a8c54b70b2557578226c55485fcfcb95307202 Author: Alexander Zinn Date: Wed Aug 20 09:40:45 2025 -0400 initial commit diff --git a/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc b/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc new file mode 100644 index 0000000..b8100b7 --- /dev/null +++ b/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc @@ -0,0 +1,111 @@ +--- +description: Use Bun instead of Node.js, npm, pnpm, or vite. +globs: "*.ts, *.tsx, *.html, *.css, *.js, *.jsx, package.json" +alwaysApply: false +--- + +Default to using Bun instead of Node.js. + +- Use `bun ` instead of `node ` or `ts-node ` +- Use `bun test` instead of `jest` or `vitest` +- Use `bun build ` instead of `webpack` or `esbuild` +- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install` +- Use `bun run + + +``` + +With the following `frontend.tsx`: + +```tsx#frontend.tsx +import React from "react"; + +// import .css files directly and it works +import './index.css'; + +import { createRoot } from "react-dom/client"; + +const root = createRoot(document.body); + +export default function Frontend() { + return

Hello, world!

; +} + +root.render(); +``` + +Then, run index.ts + +```sh +bun --hot ./index.ts +``` + +For more information, read the Bun API docs in `node_modules/bun-types/docs/**.md`. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a14702c --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# dependencies (bun install) +node_modules + +# output +out +dist +*.tgz + +# code coverage +coverage +*.lcov + +# logs +logs +_.log +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# caches +.eslintcache +.cache +*.tsbuildinfo + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..a494ac2 --- /dev/null +++ b/.npmrc @@ -0,0 +1,4 @@ +//registry-node.techniker.me/:_authToken="${NODE_REGISTRY_AUTH_TOKEN}" +@techniker-me:registry=https://registry-node.techniker.me +save-exact=true +package-lock=false \ No newline at end of file diff --git a/.nvmrc b/.nvmrc new file mode 100644 index 0000000..2edeafb --- /dev/null +++ b/.nvmrc @@ -0,0 +1 @@ +20 \ No newline at end of file diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..caa814d --- /dev/null +++ b/.prettierrc @@ -0,0 +1,12 @@ +{ + "arrowParens": "avoid", + "bracketSameLine": true, + "bracketSpacing": false, + "printWidth": 160, + "semi": true, + "singleAttributePerLine": false, + "singleQuote": true, + "tabWidth": 2, + "trailingComma": "none", + "useTabs": false +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9a06b1 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# @techniker-me/csv + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run src/index.ts +``` + +This project was created using `bun init` in bun v1.2.20. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime. + +## Usage + +### Parse from string + +```ts +import { CsvParser } from "@techniker-me/csv"; + +const parser = new CsvParser(); +const csv = "name,age\nAlice,30\nBob,40"; +const result = await parser.parseFromString(csv, { + hasHeader: true, + output: "object", // "object" or "array" +}); + +// result.headers -> ["name", "age"] +// result.rows -> [{ name: "Alice", age: "30" }, { name: "Bob", age: "40" }] +``` + +### Parse from file + +```ts +import { CsvParser, FileChunkSource } from "@techniker-me/csv"; + +const parser = new CsvParser(); +const source = new FileChunkSource("/path/to/file.csv"); +const result = await parser.parseFromSource(source, { + hasHeader: true, + output: "array", +}); +``` + +### Dialect options + +```ts +import { CsvParser, CsvDialect } from "@techniker-me/csv"; + +const parser = new CsvParser(); +const result = await parser.parseFromString("a;b\n1;2", { + hasHeader: true, + dialect: { delimiter: ";", quote: '"', trimWhitespace: true }, +}); +``` + +### API + +- `CsvParser.parseFromString(input: string, options?: ParseOptions)` +- `CsvParser.parseFromSource(source: IChunkSource, options?: ParseOptions)` +- `FileChunkSource(path: string, chunkSize?: number)` +- `StringChunkSource(content: string, chunkSize?: number)` + +`ParseOptions`: + +- `dialect` (partial): `{ delimiter, quote, trimWhitespace }` +- `hasHeader`: treat first row as headers (default true for object mode, optional for array mode) +- `output`: `"object" | "array"` (default `"array"`) +- `validateRowLength`: pads/truncates rows to uniform length (default true) +- `skipEmptyLines`: ignore empty lines (default true) + diff --git a/bunfig.toml b/bunfig.toml new file mode 100644 index 0000000..ae22d90 --- /dev/null +++ b/bunfig.toml @@ -0,0 +1,5 @@ +[install.lockfile] +save = false + +[install.scopes] +"@techniker-me" = "https://registry-node.techniker.me" diff --git a/eslint.config.ts b/eslint.config.ts new file mode 100644 index 0000000..54d89d6 --- /dev/null +++ b/eslint.config.ts @@ -0,0 +1,9 @@ +import js from "@eslint/js"; +import globals from "globals"; +import tseslint from "typescript-eslint"; +import { defineConfig } from "eslint/config"; + +export default defineConfig([ + { files: ["**/*.{js,mjs,cjs,ts,mts,cts}"], plugins: { js }, extends: ["js/recommended"], languageOptions: { globals: {...globals.browser, ...globals.node} } }, + tseslint.configs.recommended, +]); diff --git a/package.json b/package.json new file mode 100644 index 0000000..37c5420 --- /dev/null +++ b/package.json @@ -0,0 +1,43 @@ +{ + "name": "@techniker-me/csv", + "version": "0.0.0", + "module": "src/index.ts", + "type": "module", + + "scripts": { + "ci-build": "bun run build:node && bun run build:browser && bun run build:types", + "format": "prettier --write ./", + "test": "bun test", + "test:watch": "bun test --watch", + "test:coverage": "bun test --coverage", + "prebuild": "bun run clean", + "prelint": "bun install", + "lint": "eslint src/**/*.ts", + "lint:fix": "eslint src/**/*.ts --fix", + "build": "bun run build:node && bun run build:browser && bun run build:types", + "build:node": "bun build src/index.ts --outdir dist/node --target node --format esm --production", + "build:browser": "bun build src/index.ts --outdir dist/browser --target browser --format esm --production", + "build:types": "tsc --emitDeclarationOnly --outDir dist/types", + "build:node:dev": "bun build src/index.ts --outdir dist/node --target node --format esm --development", + "build:browser:dev": "bun build src/index.ts --outdir dist/browser --target browser --format esm --development", + "build:types:dev": "tsc --emitDeclarationOnly --outDir dist/types", + "prebuild:dev": "bun run clean", + "build:dev": "bun run build:node:dev && bun run build:browser:dev && bun run build:types:dev", + "postclean": "bun run lint", + "clean": "rm -rf dist" + }, + "devDependencies": { + "@eslint/js": "9.33.0", + "@types/bun": "latest", + "@types/node": "24.3.0", + "chai": "5.3.1", + "chai-as-promised": "8.0.1", + "eslint": "9.33.0", + "globals": "16.3.0", + "jiti": "2.5.1", + "mocha": "11.7.1", + "prettier": "3.6.2", + "typescript": "5.9.2", + "typescript-eslint": "8.40.0" + } +} diff --git a/src/dialect.ts b/src/dialect.ts new file mode 100644 index 0000000..4e08094 --- /dev/null +++ b/src/dialect.ts @@ -0,0 +1,17 @@ +import type { CsvDialectOptions, ICsvDialect } from "./types"; + +export class CsvDialect implements ICsvDialect { + public readonly delimiter: string; + public readonly quote: string; + public readonly trimWhitespace: boolean; + + public static readonly Default: CsvDialect = new CsvDialect(); + + public constructor(options?: Partial) { + this.delimiter = options?.delimiter ?? ","; + this.quote = options?.quote ?? '"'; + this.trimWhitespace = options?.trimWhitespace ?? true; + } +} + + diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..5654690 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,18 @@ +export type { + CsvParseResult, + CsvParseResultArray, + CsvParseResultObject, + ICsvDialect, + IChunkSource, + OutputMode, + ParseOptions, + RowArray, + RowObject, +} from "./types"; + +export { CsvDialect } from "./dialect"; + +export { CsvParser } from "./parser/CsvParser"; + +export { StringChunkSource } from "./sources/StringSource"; +export { FileChunkSource } from "./sources/FileSource"; \ No newline at end of file diff --git a/src/parser/CsvRowTokenizer.ts b/src/parser/CsvRowTokenizer.ts new file mode 100644 index 0000000..fceb466 --- /dev/null +++ b/src/parser/CsvRowTokenizer.ts @@ -0,0 +1,97 @@ +import type { ICsvDialect, RowArray } from "../types"; + +export class CsvRowTokenizer { + private readonly dialect: ICsvDialect; + + private readonly rows: RowArray[] = []; + private currentRow: RowArray = []; + private currentField: string = ""; + private inQuotes: boolean = false; + private lastWasCR: boolean = false; + + public constructor(dialect: ICsvDialect) { + this.dialect = dialect; + } + + public pushChunk(chunk: string): void { + const { delimiter, quote } = this.dialect; + + for (let index = 0; index < chunk.length; index += 1) { + const char = chunk[index]!; + + if (this.inQuotes) { + if (char === quote) { + const next = chunk[index + 1]; + if (next === quote) { + this.currentField += quote; + index += 1; + continue; + } + this.inQuotes = false; + continue; + } + this.currentField += char; + continue; + } + + if (char === quote) { + this.inQuotes = true; + continue; + } + + if (char === delimiter) { + this.flushField(); + continue; + } + + if (char === "\n") { + if (!this.lastWasCR) { + this.flushField(); + this.flushRow(); + } + this.lastWasCR = false; + continue; + } + + if (char === "\r") { + this.flushField(); + this.flushRow(); + this.lastWasCR = true; + continue; + } + + this.currentField += char; + this.lastWasCR = false; + } + } + + public takeRows(): RowArray[] { + const out = this.rows.splice(0, this.rows.length); + return out; + } + + public drain(): RowArray[] { + if (this.inQuotes) { + // Treat end-of-input as closing line when in quotes + this.inQuotes = false; + } + this.flushField(); + if (this.currentRow.length > 0 || this.currentField.length > 0) { + this.flushRow(); + } + return this.takeRows(); + } + + private flushField(): void { + const value = this.dialect.trimWhitespace ? this.currentField.trim() : this.currentField; + this.currentRow.push(value); + this.currentField = ""; + } + + private flushRow(): void { + this.rows.push(this.currentRow); + this.currentRow = []; + } +} + + diff --git a/src/sources/FileSource.ts b/src/sources/FileSource.ts new file mode 100644 index 0000000..6a5e6b1 --- /dev/null +++ b/src/sources/FileSource.ts @@ -0,0 +1,38 @@ +import type { IChunkSource } from "../types"; + +export class FileChunkSource implements IChunkSource { + private readonly path: string; + private readonly chunkSize: number; + + public constructor(path: string, chunkSize: number = 64 * 1024) { + this.path = path; + this.chunkSize = Math.max(1024, chunkSize); + } + + public async *chunks(): AsyncIterable { + // Prefer Bun if available; fall back to Node streams to keep portability + if (typeof Bun !== "undefined" && typeof Bun.file === "function") { + const file = Bun.file(this.path); + const reader = file.stream().getReader(); + try { + while (true) { + const { value, done } = await reader.read(); + if (done) break; + if (value) yield Buffer.from(value).toString("utf8"); + } + } finally { + reader.releaseLock(); + } + return; + } + + // Node.js fallback + const { createReadStream } = await import("node:fs"); + const stream = createReadStream(this.path, { encoding: "utf8", highWaterMark: this.chunkSize }); + for await (const chunk of stream) { + yield chunk as string; + } + } +} + + diff --git a/src/sources/StringSource.ts b/src/sources/StringSource.ts new file mode 100644 index 0000000..f94198c --- /dev/null +++ b/src/sources/StringSource.ts @@ -0,0 +1,19 @@ +import type { IChunkSource } from "../types"; + +export class StringChunkSource implements IChunkSource { + private readonly content: string; + private readonly chunkSize: number; + + public constructor(content: string, chunkSize: number = 64 * 1024) { + this.content = content; + this.chunkSize = Math.max(1024, chunkSize); + } + + public async *chunks(): AsyncIterable { + for (let offset = 0; offset < this.content.length; offset += this.chunkSize) { + yield this.content.slice(offset, offset + this.chunkSize); + } + } +} + + diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..b48b720 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,42 @@ +export interface CsvDialectOptions { + delimiter: string; + quote: string; + trimWhitespace: boolean; +} + +export interface ICsvDialect { + readonly delimiter: string; + readonly quote: string; + readonly trimWhitespace: boolean; +} + +export type RowArray = string[]; +export type RowObject = Record; + +export type OutputMode = "array" | "object"; + +export interface ParseOptions { + dialect?: Partial; + hasHeader?: boolean; + output?: OutputMode; + validateRowLength?: boolean; + skipEmptyLines?: boolean; +} + +export interface CsvParseResultArray { + headers?: string[]; + rows: RowArray[]; +} + +export interface CsvParseResultObject { + headers: string[]; + rows: RowObject[]; +} + +export type CsvParseResult = CsvParseResultArray | CsvParseResultObject; + +export interface IChunkSource { + chunks(): AsyncIterable; +} + + diff --git a/test.__tmp.csv b/test.__tmp.csv new file mode 100644 index 0000000..e69de29 diff --git a/test/csv.spec.ts b/test/csv.spec.ts new file mode 100644 index 0000000..2a2ad9f --- /dev/null +++ b/test/csv.spec.ts @@ -0,0 +1,81 @@ +import { describe, it, expect } from "bun:test"; +import { CsvParser, FileChunkSource } from "../src/index.ts"; + +describe("CsvParser - string input", () => { + it("parses simple CSV with headers to objects", async () => { + const parser = new CsvParser(); + const csv = "name,age\nAlice,30\nBob,40"; + const result = await parser.parseFromString(csv, { hasHeader: true, output: "object" }); + + expect(result.headers).toEqual(["name", "age"]); + expect(result.rows).toEqual([ + { name: "Alice", age: "30" }, + { name: "Bob", age: "40" }, + ]); + }); + + it("handles quoted fields with commas and escaped quotes", async () => { + const parser = new CsvParser(); + const csv = [ + '"name","note"', + '"Alice","He said ""hello"", world"', + '"Bob","x,y,z"', + ].join("\n"); + + const result = await parser.parseFromString(csv, { hasHeader: true, output: "object" }); + expect(result.rows).toEqual([ + { name: "Alice", note: 'He said "hello", world' }, + { name: "Bob", note: "x,y,z" }, + ]); + }); + + it("supports CRLF line endings", async () => { + const parser = new CsvParser(); + const csv = "a,b\r\n1,2\r\n3,4"; + const result = await parser.parseFromString(csv, { hasHeader: true }); + expect(result.headers).toEqual(["a", "b"]); + expect(result.rows).toEqual([ + ["1", "2"], + ["3", "4"], + ]); + }); + + it("skips empty lines by default", async () => { + const parser = new CsvParser(); + const csv = "a,b\n\n1,2\n\n"; + const result = await parser.parseFromString(csv, { hasHeader: true }); + expect(result.rows).toEqual([["1", "2"]]); + }); + + it("normalizes row length when validateRowLength is true (default)", async () => { + const parser = new CsvParser(); + const csv = "a,b,c\n1,2\n3,4,5"; + const result = await parser.parseFromString(csv, { hasHeader: true }); + expect(result.rows).toEqual([ + ["1", "2", ""], + ["3", "4", "5"], + ]); + }); +}); + +describe("CsvParser - file input", () => { + it("parses a CSV file via FileChunkSource", async () => { + const parser = new CsvParser(); + const content = "city,temp\nBerlin,12\nMunich,10"; + const tmpPath = `${process.cwd()}/test.__tmp.csv`; + await Bun.write(tmpPath, content); + try { + const source = new FileChunkSource(tmpPath); + const result = await parser.parseFromSource(source, { hasHeader: true, output: "object" }); + expect(result.rows).toEqual([ + { city: "Berlin", temp: "12" }, + { city: "Munich", temp: "10" }, + ]); + } finally { + // best-effort cleanup + try { await Bun.write(tmpPath, ""); } catch {} + } + }); +}); + + diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..9ef9612 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,36 @@ +{ + "compilerOptions": { + // Environment setup & latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "emitDeclarationOnly": true, + "declaration": true, + "declarationDir": "./dist/types", + "outDir": "./dist/node", + "rootDir": "./src", + "declarationMap": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + }, + "include": ["src"], + "exclude": ["test", "dist", "node_modules"] +}