| | """
|
| | Repository Tech-Stack Scanner
|
| | ===============================
|
| | Scans a repository and produces a Markdown table of file types with counts
|
| | and total sizes in KB. Supports every file extension in existence.
|
| |
|
| | Usage:
|
| | python scan_techstack.py [path] [--output FILE] [--ignore PATTERN ...]
|
| | [--exclude-ext EXT ...] [--sort count|size|ext]
|
| |
|
| | Output:
|
| | A Markdown file containing the tech-stack audit table.
|
| | """
|
| |
|
| | import os
|
| | import sys
|
| | import argparse
|
| | from pathlib import Path
|
| | from collections import defaultdict
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | EXT_LABELS: dict[str, str] = {
|
| |
|
| | ".py": "Python",
|
| | ".pyw": "Python (windowed)",
|
| | ".pyx": "Cython",
|
| | ".pxd": "Cython Definition",
|
| | ".pyi": "Python Stub",
|
| | ".pyc": "Python Compiled",
|
| | ".pyo": "Python Optimized",
|
| | ".js": "JavaScript",
|
| | ".mjs": "ES Module JavaScript",
|
| | ".cjs": "CommonJS JavaScript",
|
| | ".jsx": "JSX (React)",
|
| | ".ts": "TypeScript",
|
| | ".tsx": "TSX (React)",
|
| | ".java": "Java",
|
| | ".class": "Java Class",
|
| | ".jar": "Java Archive",
|
| | ".kt": "Kotlin",
|
| | ".kts": "Kotlin Script",
|
| | ".scala": "Scala",
|
| | ".groovy": "Groovy",
|
| | ".c": "C",
|
| | ".h": "C Header",
|
| | ".cpp": "C++",
|
| | ".cxx": "C++",
|
| | ".cc": "C++",
|
| | ".c++": "C++",
|
| | ".hpp": "C++ Header",
|
| | ".hxx": "C++ Header",
|
| | ".hh": "C++ Header",
|
| | ".h++": "C++ Header",
|
| | ".cs": "C#",
|
| | ".csx": "C# Script",
|
| | ".fs": "F#",
|
| | ".fsx": "F# Script",
|
| | ".fsi": "F# Signature",
|
| | ".vb": "Visual Basic",
|
| | ".vbs": "VBScript",
|
| | ".go": "Go",
|
| | ".rs": "Rust",
|
| | ".rb": "Ruby",
|
| | ".erb": "Embedded Ruby",
|
| | ".php": "PHP",
|
| | ".pl": "Perl",
|
| | ".pm": "Perl Module",
|
| | ".lua": "Lua",
|
| | ".r": "R",
|
| | ".rmd": "R Markdown",
|
| | ".swift": "Swift",
|
| | ".m": "Objective-C / MATLAB",
|
| | ".mm": "Objective-C++",
|
| | ".d": "D",
|
| | ".dart": "Dart",
|
| | ".ex": "Elixir",
|
| | ".exs": "Elixir Script",
|
| | ".erl": "Erlang",
|
| | ".hrl": "Erlang Header",
|
| | ".hs": "Haskell",
|
| | ".lhs": "Literate Haskell",
|
| | ".ml": "OCaml",
|
| | ".mli": "OCaml Interface",
|
| | ".clj": "Clojure",
|
| | ".cljs": "ClojureScript",
|
| | ".cljc": "Clojure Common",
|
| | ".edn": "EDN (Clojure)",
|
| | ".lisp": "Common Lisp",
|
| | ".cl": "Common Lisp",
|
| | ".el": "Emacs Lisp",
|
| | ".scm": "Scheme",
|
| | ".rkt": "Racket",
|
| | ".nim": "Nim",
|
| | ".zig": "Zig",
|
| | ".v": "V / Verilog",
|
| | ".vhd": "VHDL",
|
| | ".vhdl": "VHDL",
|
| | ".sv": "SystemVerilog",
|
| | ".asm": "Assembly",
|
| | ".s": "Assembly",
|
| | ".f": "Fortran",
|
| | ".f90": "Fortran 90",
|
| | ".f95": "Fortran 95",
|
| | ".f03": "Fortran 2003",
|
| | ".f08": "Fortran 2008",
|
| | ".for": "Fortran",
|
| | ".cob": "COBOL",
|
| | ".cbl": "COBOL",
|
| | ".pas": "Pascal",
|
| | ".pp": "Pascal",
|
| | ".ada": "Ada",
|
| | ".adb": "Ada Body",
|
| | ".ads": "Ada Spec",
|
| | ".tcl": "Tcl",
|
| | ".tk": "Tcl/Tk",
|
| | ".awk": "AWK",
|
| | ".sed": "sed",
|
| | ".jl": "Julia",
|
| | ".cr": "Crystal",
|
| | ".hack": "Hack",
|
| | ".hx": "Haxe",
|
| | ".hxml": "Haxe XML",
|
| | ".purs": "PureScript",
|
| | ".idr": "Idris",
|
| | ".agda": "Agda",
|
| | ".lean": "Lean",
|
| | ".coq": "Coq",
|
| | ".thy": "Isabelle",
|
| | ".pro": "Prolog",
|
| | ".sml": "Standard ML",
|
| | ".fig": "MATLAB Figure",
|
| | ".mat": "MATLAB Data",
|
| |
|
| |
|
| | ".sh": "Shell Script",
|
| | ".bash": "Bash Script",
|
| | ".zsh": "Zsh Script",
|
| | ".fish": "Fish Script",
|
| | ".bat": "Batch File",
|
| | ".cmd": "Windows Command",
|
| | ".ps1": "PowerShell",
|
| | ".psm1": "PowerShell Module",
|
| | ".psd1": "PowerShell Data",
|
| |
|
| |
|
| | ".html": "HTML",
|
| | ".htm": "HTML",
|
| | ".xhtml": "XHTML",
|
| | ".css": "CSS",
|
| | ".scss": "SCSS",
|
| | ".sass": "Sass",
|
| | ".less": "Less",
|
| | ".styl": "Stylus",
|
| | ".vue": "Vue",
|
| | ".svelte": "Svelte",
|
| | ".astro": "Astro",
|
| | ".ejs": "EJS Template",
|
| | ".pug": "Pug Template",
|
| | ".jade": "Jade Template",
|
| | ".hbs": "Handlebars",
|
| | ".mustache": "Mustache",
|
| | ".twig": "Twig Template",
|
| | ".jinja": "Jinja Template",
|
| | ".jinja2": "Jinja2 Template",
|
| | ".j2": "Jinja2 Template",
|
| | ".njk": "Nunjucks Template",
|
| | ".liquid": "Liquid Template",
|
| | ".haml": "HAML",
|
| | ".slim": "Slim Template",
|
| | ".blade.php":"Blade Template",
|
| | ".jsp": "JavaServer Pages",
|
| | ".asp": "ASP",
|
| | ".aspx": "ASP.NET",
|
| | ".cshtml": "Razor (C#)",
|
| | ".razor": "Razor",
|
| | ".wasm": "WebAssembly",
|
| |
|
| |
|
| | ".json": "JSON",
|
| | ".jsonl": "JSON Lines",
|
| | ".json5": "JSON5",
|
| | ".geojson": "GeoJSON",
|
| | ".xml": "XML",
|
| | ".xsl": "XSLT",
|
| | ".xslt": "XSLT",
|
| | ".xsd": "XML Schema",
|
| | ".dtd": "Document Type Definition",
|
| | ".yaml": "YAML",
|
| | ".yml": "YAML",
|
| | ".toml": "TOML",
|
| | ".ini": "INI Config",
|
| | ".cfg": "Config",
|
| | ".conf": "Config",
|
| | ".env": "Environment File",
|
| | ".properties": "Properties File",
|
| | ".csv": "CSV",
|
| | ".tsv": "TSV",
|
| | ".parquet": "Apache Parquet",
|
| | ".avro": "Apache Avro",
|
| | ".orc": "Apache ORC",
|
| | ".feather": "Feather (Arrow)",
|
| | ".arrow": "Apache Arrow",
|
| | ".msgpack": "MessagePack",
|
| | ".bson": "BSON",
|
| | ".cbor": "CBOR",
|
| | ".protobuf": "Protocol Buffer",
|
| | ".proto": "Protocol Buffer Schema",
|
| | ".thrift": "Apache Thrift",
|
| | ".hcl": "HCL (Terraform)",
|
| | ".tf": "Terraform",
|
| | ".tfvars": "Terraform Variables",
|
| | ".pkl": "Pickle",
|
| | ".pickle": "Pickle",
|
| | ".npy": "NumPy Array",
|
| | ".npz": "NumPy Archive",
|
| | ".h5": "HDF5",
|
| | ".hdf5": "HDF5",
|
| | ".nc": "NetCDF",
|
| | ".sqlite": "SQLite Database",
|
| | ".sqlite3": "SQLite Database",
|
| | ".db": "Database",
|
| | ".dbf": "dBASE",
|
| | ".mdb": "Access Database",
|
| | ".accdb": "Access Database",
|
| | ".lmdb": "LMDB",
|
| | ".leveldb": "LevelDB",
|
| |
|
| |
|
| | ".md": "Markdown",
|
| | ".mdx": "MDX",
|
| | ".txt": "Plain Text",
|
| | ".text": "Plain Text",
|
| | ".log": "Log File",
|
| | ".rst": "reStructuredText",
|
| | ".adoc": "AsciiDoc",
|
| | ".org": "Org Mode",
|
| | ".tex": "LaTeX",
|
| | ".latex": "LaTeX",
|
| | ".bib": "BibTeX",
|
| | ".rtf": "Rich Text Format",
|
| | ".doc": "Microsoft Word",
|
| | ".docx": "Microsoft Word (OOXML)",
|
| | ".odt": "OpenDocument Text",
|
| | ".pdf": "PDF",
|
| | ".xls": "Microsoft Excel",
|
| | ".xlsx": "Microsoft Excel (OOXML)",
|
| | ".xlsm": "Excel Macro-Enabled",
|
| | ".xlsb": "Excel Binary",
|
| | ".ods": "OpenDocument Spreadsheet",
|
| | ".ppt": "Microsoft PowerPoint",
|
| | ".pptx": "Microsoft PowerPoint (OOXML)",
|
| | ".odp": "OpenDocument Presentation",
|
| | ".pages": "Apple Pages",
|
| | ".numbers": "Apple Numbers",
|
| | ".key": "Apple Keynote",
|
| | ".epub": "EPUB",
|
| | ".mobi": "Mobipocket",
|
| | ".djvu": "DjVu",
|
| | ".chm": "Compiled HTML Help",
|
| | ".man": "Man Page",
|
| | ".info": "GNU Info",
|
| |
|
| |
|
| | ".ipynb": "Jupyter Notebook",
|
| | ".rmd": "R Markdown",
|
| | ".qmd": "Quarto Document",
|
| | ".dvc": "DVC File",
|
| | ".onnx": "ONNX Model",
|
| | ".pt": "PyTorch Model",
|
| | ".pth": "PyTorch Checkpoint",
|
| | ".safetensors": "SafeTensors Model",
|
| | ".gguf": "GGUF Model",
|
| | ".bin": "Binary File",
|
| |
|
| |
|
| | ".png": "PNG Image",
|
| | ".jpg": "JPEG Image",
|
| | ".jpeg": "JPEG Image",
|
| | ".gif": "GIF Image",
|
| | ".bmp": "Bitmap Image",
|
| | ".tiff": "TIFF Image",
|
| | ".tif": "TIFF Image",
|
| | ".webp": "WebP Image",
|
| | ".avif": "AVIF Image",
|
| | ".heic": "HEIC Image",
|
| | ".heif": "HEIF Image",
|
| | ".svg": "SVG Image",
|
| | ".ico": "Icon",
|
| | ".icns": "Apple Icon",
|
| | ".cur": "Cursor",
|
| | ".psd": "Photoshop",
|
| | ".ai": "Adobe Illustrator",
|
| | ".sketch": "Sketch",
|
| | ".fig": "Figma",
|
| | ".xd": "Adobe XD",
|
| | ".eps": "EPS Vector",
|
| | ".raw": "RAW Image",
|
| | ".cr2": "Canon RAW",
|
| | ".nef": "Nikon RAW",
|
| | ".arw": "Sony RAW",
|
| | ".dng": "DNG RAW",
|
| | ".exr": "OpenEXR",
|
| | ".hdr": "HDR Image",
|
| | ".pbm": "PBM Image",
|
| | ".pgm": "PGM Image",
|
| | ".ppm": "PPM Image",
|
| | ".pcx": "PCX Image",
|
| | ".tga": "TGA Image",
|
| |
|
| |
|
| | ".mp3": "MP3 Audio",
|
| | ".wav": "WAV Audio",
|
| | ".flac": "FLAC Audio",
|
| | ".aac": "AAC Audio",
|
| | ".ogg": "Ogg Audio",
|
| | ".wma": "WMA Audio",
|
| | ".m4a": "M4A Audio",
|
| | ".opus": "Opus Audio",
|
| | ".aiff": "AIFF Audio",
|
| | ".mid": "MIDI",
|
| | ".midi": "MIDI",
|
| | ".amr": "AMR Audio",
|
| | ".ape": "APE Audio",
|
| | ".alac": "ALAC Audio",
|
| |
|
| |
|
| | ".mp4": "MP4 Video",
|
| | ".mkv": "MKV Video",
|
| | ".avi": "AVI Video",
|
| | ".mov": "QuickTime Video",
|
| | ".wmv": "WMV Video",
|
| | ".flv": "Flash Video",
|
| | ".webm": "WebM Video",
|
| | ".m4v": "M4V Video",
|
| | ".mpg": "MPEG Video",
|
| | ".mpeg": "MPEG Video",
|
| | ".3gp": "3GP Video",
|
| | ".ogv": "Ogg Video",
|
| | ".ts": "MPEG-TS",
|
| | ".vob": "DVD Video",
|
| | ".rm": "RealMedia",
|
| |
|
| |
|
| | ".ttf": "TrueType Font",
|
| | ".otf": "OpenType Font",
|
| | ".woff": "WOFF Font",
|
| | ".woff2": "WOFF2 Font",
|
| | ".eot": "EOT Font",
|
| | ".fon": "Bitmap Font",
|
| |
|
| |
|
| | ".zip": "ZIP Archive",
|
| | ".gz": "Gzip Archive",
|
| | ".gzip": "Gzip Archive",
|
| | ".bz2": "Bzip2 Archive",
|
| | ".xz": "XZ Archive",
|
| | ".lz": "Lzip Archive",
|
| | ".lzma": "LZMA Archive",
|
| | ".zst": "Zstandard Archive",
|
| | ".tar": "Tar Archive",
|
| | ".tgz": "Tar+Gzip Archive",
|
| | ".tbz2": "Tar+Bzip2 Archive",
|
| | ".txz": "Tar+XZ Archive",
|
| | ".rar": "RAR Archive",
|
| | ".7z": "7-Zip Archive",
|
| | ".cab": "Cabinet Archive",
|
| | ".iso": "ISO Disk Image",
|
| | ".dmg": "macOS Disk Image",
|
| | ".img": "Disk Image",
|
| | ".vhd": "Virtual Hard Disk",
|
| | ".vhdx": "Virtual Hard Disk (VHDX)",
|
| | ".vmdk": "VMware Disk",
|
| | ".qcow2": "QEMU Disk",
|
| | ".deb": "Debian Package",
|
| | ".rpm": "RPM Package",
|
| | ".apk": "Android Package",
|
| | ".ipa": "iOS App",
|
| | ".snap": "Snap Package",
|
| | ".flatpak": "Flatpak Package",
|
| | ".appimage": "AppImage",
|
| | ".msi": "MSI Installer",
|
| | ".exe": "Windows Executable",
|
| | ".dll": "DLL Library",
|
| | ".so": "Shared Object",
|
| | ".dylib": "macOS Dynamic Library",
|
| | ".a": "Static Library",
|
| | ".lib": "Library",
|
| | ".o": "Object File",
|
| | ".obj": "Object File",
|
| | ".sys": "System File",
|
| | ".drv": "Driver",
|
| |
|
| |
|
| | ".dockerfile":"Dockerfile",
|
| | ".vagrantfile":"Vagrantfile",
|
| | ".jenkinsfile":"Jenkinsfile",
|
| |
|
| |
|
| | ".mq5": "MQL5 Source",
|
| | ".mq4": "MQL4 Source",
|
| | ".mqh": "MQL Header",
|
| | ".ex5": "MQL5 Compiled",
|
| | ".ex4": "MQL4 Compiled",
|
| | ".set": "MT Parameter Set",
|
| | ".pine": "Pine Script",
|
| | ".tpl": "MT Template",
|
| | ".chr": "MT Chart",
|
| | ".mql": "MQL Source",
|
| |
|
| |
|
| | ".makefile": "Makefile",
|
| | ".cmake": "CMake",
|
| | ".gradle": "Gradle",
|
| | ".sbt": "SBT Build",
|
| | ".gemspec": "Ruby Gem Spec",
|
| | ".podspec": "CocoaPods Spec",
|
| | ".cabal": "Cabal (Haskell)",
|
| | ".csproj": "C# Project",
|
| | ".fsproj": "F# Project",
|
| | ".vbproj": "VB.NET Project",
|
| | ".sln": "Visual Studio Solution",
|
| | ".xcodeproj":"Xcode Project",
|
| | ".pbxproj": "Xcode Project (PBX)",
|
| | ".xcworkspace":"Xcode Workspace",
|
| | ".plist": "Property List",
|
| | ".lnk": "Windows Shortcut",
|
| | ".url": "URL Shortcut",
|
| | ".desktop": "Desktop Entry",
|
| | ".reg": "Windows Registry",
|
| | ".service": "Systemd Service",
|
| | ".timer": "Systemd Timer",
|
| | ".socket": "Systemd Socket",
|
| | ".lock": "Lock File",
|
| | ".patch": "Patch File",
|
| | ".diff": "Diff File",
|
| | ".map": "Source Map",
|
| | ".whl": "Python Wheel",
|
| | ".egg": "Python Egg",
|
| | ".gem": "Ruby Gem",
|
| | ".nupkg": "NuGet Package",
|
| | ".crate": "Rust Crate",
|
| |
|
| |
|
| | ".sql": "SQL",
|
| | ".mysql": "MySQL",
|
| | ".pgsql": "PostgreSQL",
|
| | ".plsql": "PL/SQL",
|
| |
|
| |
|
| | ".graphql": "GraphQL",
|
| | ".gql": "GraphQL",
|
| | ".swagger": "Swagger",
|
| | ".openapi": "OpenAPI Spec",
|
| |
|
| |
|
| | ".stl": "STL 3D Model",
|
| | ".obj": "Wavefront OBJ",
|
| | ".fbx": "FBX 3D Model",
|
| | ".gltf": "glTF 3D Model",
|
| | ".glb": "glTF Binary",
|
| | ".blend": "Blender File",
|
| | ".3ds": "3DS Model",
|
| | ".dae": "Collada",
|
| | ".usd": "USD Scene",
|
| | ".usda": "USD ASCII",
|
| | ".usdc": "USD Binary",
|
| | ".step": "STEP CAD",
|
| | ".stp": "STEP CAD",
|
| | ".iges": "IGES CAD",
|
| | ".igs": "IGES CAD",
|
| | ".dwg": "AutoCAD Drawing",
|
| | ".dxf": "AutoCAD Exchange",
|
| | ".shp": "Shapefile",
|
| | ".kml": "KML",
|
| | ".kmz": "KMZ",
|
| | ".gpx": "GPX Track",
|
| |
|
| |
|
| | ".pem": "PEM Certificate",
|
| | ".crt": "Certificate",
|
| | ".cer": "Certificate",
|
| | ".csr": "Certificate Request",
|
| | ".p12": "PKCS#12",
|
| | ".pfx": "PKCS#12",
|
| | ".asc": "PGP/GPG",
|
| | ".gpg": "GPG Encrypted",
|
| | ".sig": "Signature",
|
| | ".pub": "Public Key",
|
| |
|
| |
|
| | ".gitignore": "Git Ignore",
|
| | ".gitattributes": "Git Attributes",
|
| | ".gitmodules": "Git Submodules",
|
| |
|
| |
|
| | ".editorconfig": "EditorConfig",
|
| | ".prettierrc": "Prettier Config",
|
| | ".eslintrc": "ESLint Config",
|
| | ".stylelintrc": "Stylelint Config",
|
| | ".babelrc": "Babel Config",
|
| | ".swcrc": "SWC Config",
|
| | ".nvmrc": "NVM Config",
|
| | ".npmrc": "NPM Config",
|
| | ".yarnrc": "Yarn Config",
|
| |
|
| |
|
| | ".mw5": "MQL5 Variant",
|
| | ".webmanifest": "Web App Manifest",
|
| | }
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | DEFAULT_IGNORE = {
|
| | ".git",
|
| | "__pycache__",
|
| | ".venv",
|
| | "venv",
|
| | "env",
|
| | "node_modules",
|
| | ".ipynb_checkpoints",
|
| | ".mypy_cache",
|
| | ".pytest_cache",
|
| | ".tox",
|
| | ".eggs",
|
| | "*.egg-info",
|
| | ".DS_Store",
|
| | "Thumbs.db",
|
| | "desktop.ini",
|
| | }
|
| |
|
| |
|
| | def should_ignore(name: str, ignore_set: set) -> bool:
|
| | """Return True if *name* matches any pattern in the ignore set."""
|
| | if name in ignore_set:
|
| | return True
|
| | for pattern in ignore_set:
|
| | if pattern.startswith("*") and name.endswith(pattern[1:]):
|
| | return True
|
| | return False
|
| |
|
| |
|
| | def get_label(ext: str) -> str:
|
| | """Return the human-readable label for an extension."""
|
| | if ext in EXT_LABELS:
|
| | return EXT_LABELS[ext]
|
| |
|
| | return ext.upper().lstrip(".")
|
| |
|
| |
|
| | def scan_files(root_path: str, ignore_set: set, exclude_exts: set) -> dict:
|
| | """
|
| | Walk *root_path* and collect per-extension statistics.
|
| |
|
| | Returns a dict keyed by normalised extension string, e.g. ".py",
|
| | with values ``{"count": int, "size_bytes": int, "label": str}``.
|
| |
|
| | Files without an extension are grouped under "(no extension)".
|
| | """
|
| |
|
| | stats: dict = defaultdict(lambda: {"count": 0, "size_bytes": 0, "label": ""})
|
| |
|
| | for dirpath, dirnames, filenames in os.walk(root_path):
|
| |
|
| | dirnames[:] = [
|
| | d for d in dirnames if not should_ignore(d, ignore_set)
|
| | ]
|
| |
|
| | for fname in filenames:
|
| | if should_ignore(fname, ignore_set):
|
| | continue
|
| |
|
| | filepath = os.path.join(dirpath, fname)
|
| |
|
| |
|
| | _, ext = os.path.splitext(fname)
|
| | ext = ext.lower()
|
| |
|
| | if ext and ext.lstrip(".") in exclude_exts:
|
| | continue
|
| |
|
| |
|
| | if ext:
|
| | label = f"{get_label(ext)} ({ext})"
|
| | else:
|
| | ext = "(none)"
|
| | label = "(no extension)"
|
| |
|
| | try:
|
| | size = os.path.getsize(filepath)
|
| | except (OSError, PermissionError):
|
| | size = 0
|
| |
|
| | stats[ext]["count"] += 1
|
| | stats[ext]["size_bytes"] += size
|
| | stats[ext]["label"] = label
|
| |
|
| | return dict(stats)
|
| |
|
| |
|
| | def format_size_kb(size_bytes: int) -> str:
|
| | """Return exact size in KB, rounded to 1 decimal place."""
|
| | kb = size_bytes / 1024
|
| | if kb == 0:
|
| | return "0"
|
| |
|
| | formatted = f"{kb:,.1f}"
|
| | return formatted
|
| |
|
| |
|
| | def build_table(stats: dict, sort_by: str) -> list[str]:
|
| | """
|
| | Build a Markdown table from *stats*.
|
| |
|
| | Returns a list of lines.
|
| | """
|
| |
|
| |
|
| | rows = []
|
| | total_count = 0
|
| | total_bytes = 0
|
| |
|
| | for ext, data in stats.items():
|
| | rows.append(
|
| | {
|
| | "label": data["label"],
|
| | "count": data["count"],
|
| | "size_bytes": data["size_bytes"],
|
| | "ext_key": ext,
|
| | }
|
| | )
|
| | total_count += data["count"]
|
| | total_bytes += data["size_bytes"]
|
| |
|
| |
|
| | if sort_by == "count":
|
| | rows.sort(key=lambda r: (-r["count"], r["label"].lower()))
|
| | elif sort_by == "size":
|
| | rows.sort(key=lambda r: (-r["size_bytes"], r["label"].lower()))
|
| | else:
|
| | rows.sort(key=lambda r: r["label"].lower())
|
| |
|
| |
|
| | lines = [
|
| | "| File Type | Count | Size (KB) |",
|
| | "| :--- | :--- | :--- |",
|
| | ]
|
| | for row in rows:
|
| | lines.append(
|
| | f"| {row['label']} | {row['count']} | {format_size_kb(row['size_bytes'])} |"
|
| | )
|
| |
|
| |
|
| | lines.append(f"| **Total** | **{total_count}** | **{format_size_kb(total_bytes)}** |")
|
| |
|
| | return lines
|
| |
|
| |
|
| | def main() -> None:
|
| | parser = argparse.ArgumentParser(
|
| | description="Audit repository tech-stack (file types, counts, sizes)."
|
| | )
|
| | parser.add_argument(
|
| | "path",
|
| | nargs="?",
|
| | default=".",
|
| | help="Root directory to scan (default: current directory).",
|
| | )
|
| | parser.add_argument(
|
| | "--output",
|
| | "-o",
|
| | default=None,
|
| | help="Output Markdown file path (default: TECHSTACK.md in scanned dir).",
|
| | )
|
| | parser.add_argument(
|
| | "--ignore",
|
| | nargs="*",
|
| | default=None,
|
| | help="Extra directory/file names to ignore.",
|
| | )
|
| | parser.add_argument(
|
| | "--no-default-ignore",
|
| | action="store_true",
|
| | help="Disable the built-in ignore list.",
|
| | )
|
| | parser.add_argument(
|
| | "--exclude-ext",
|
| | nargs="*",
|
| | default=None,
|
| | help="File extensions to exclude from output (without dot, e.g. png mp4 git).",
|
| | )
|
| | parser.add_argument(
|
| | "--sort",
|
| | choices=["count", "size", "ext"],
|
| | default="count",
|
| | help="Sort order for the table (default: count descending).",
|
| | )
|
| | args = parser.parse_args()
|
| |
|
| | root = os.path.abspath(args.path)
|
| | root_name = os.path.basename(root)
|
| |
|
| |
|
| | ignore_set: set = set() if args.no_default_ignore else set(DEFAULT_IGNORE)
|
| | if args.ignore:
|
| | ignore_set.update(args.ignore)
|
| |
|
| |
|
| | out_path = args.output or os.path.join(root, "TECHSTACK.md")
|
| | out_name = os.path.basename(out_path)
|
| | ignore_set.add(out_name)
|
| |
|
| |
|
| | exclude_exts: set = set()
|
| | if args.exclude_ext:
|
| | exclude_exts = {e.lower().lstrip(".") for e in args.exclude_ext}
|
| |
|
| | print(f"Scanning: {root}")
|
| | print(f"Ignoring: {', '.join(sorted(ignore_set))}")
|
| | if exclude_exts:
|
| | print(f"Excluding extensions: {', '.join(sorted(exclude_exts))}")
|
| |
|
| | stats = scan_files(root, ignore_set, exclude_exts)
|
| | table_lines = build_table(stats, sort_by=args.sort)
|
| |
|
| |
|
| | md_lines = [
|
| | f"## Techstack\n",
|
| | f"Audit of **{root_name}** project files (excluding environment and cache):\n",
|
| | ]
|
| | md_lines.extend(table_lines)
|
| | md_lines.append("")
|
| |
|
| | content = "\n".join(md_lines)
|
| |
|
| | with open(out_path, "w", encoding="utf-8") as fh:
|
| | fh.write(content)
|
| |
|
| | total = sum(d["count"] for d in stats.values())
|
| | print(f"Done β {total} files across {len(stats)} types written to {out_path}")
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| | main()
|
| |
|