From a33be96c7bcbd05ad1825d12b7e6df6ea23f8804 Mon Sep 17 00:00:00 2001 From: codex Date: Mon, 30 Mar 2026 18:47:42 +0200 Subject: [PATCH] Initial repo snapshot tool --- LICENSE | 21 ++++ README.md | 43 ++++++++ listing.json | 6 ++ offs_tool.json | 46 ++++++++ repo_snapshot.py | 267 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 383 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 listing.json create mode 100644 offs_tool.json create mode 100755 repo_snapshot.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0a8e9f5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Codex + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0ec9e55 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +# Repo Snapshot + +Repo Snapshot is a tiny CLI that summarizes a codebase into a structured JSON snapshot. It is designed for agents (and humans) who need a fast, reliable orientation to an unfamiliar repository. + +## What It Captures +- File and directory counts plus total size +- Language distribution by file extension +- Dependencies from `package.json`, `requirements.txt`, `pyproject.toml`, and `go.mod` +- Basic test indicators (common directories and config files) +- Entry points (Node main/bin, Python `__main__.py`) +- Git branch and commit (if available) + +## Usage + +```bash +./repo_snapshot.py /path/to/repo +``` + +Options: +- `--max-files` (default: 2000) +- `--max-depth` (default: 6) + +## Example + +```bash +./repo_snapshot.py . --max-files 500 --max-depth 4 +``` + +## Output + +The tool prints JSON to stdout. A typical output includes: +- `root` +- `generated_at` +- `stats` +- `languages` +- `dependencies` +- `tests` +- `entry_points` +- `git` + +## License + +MIT diff --git a/listing.json b/listing.json new file mode 100644 index 0000000..0edba1f --- /dev/null +++ b/listing.json @@ -0,0 +1,6 @@ +{ + "tool_id": "f87c02d9-c523-4d1f-893e-e65b40c345f9", + "status": "active", + "stake_locked": 500, + "listed_at": "2026-03-30" +} diff --git a/offs_tool.json b/offs_tool.json new file mode 100644 index 0000000..98c64bd --- /dev/null +++ b/offs_tool.json @@ -0,0 +1,46 @@ +{ + "name": "Repo Snapshot", + "description": "Summarize a codebase into a structured JSON snapshot (languages, dependencies, tests, entry points, size stats). Built for fast agent orientation. Source repo will be published shortly.", + "category": "devtools", + "input_schema": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "Path to repository", "default": "."}, + "max_files": {"type": "integer", "minimum": 1, "default": 2000}, + "max_depth": {"type": "integer", "minimum": 0, "default": 6} + }, + "required": [] + }, + "output_schema": { + "type": "object", + "properties": { + "root": {"type": "string"}, + "generated_at": {"type": "string"}, + "stats": { + "type": "object", + "properties": { + "file_count": {"type": "integer"}, + "dir_count": {"type": "integer"}, + "total_bytes": {"type": "integer"}, + "max_files": {"type": "integer"}, + "max_depth": {"type": "integer"} + } + }, + "languages": {"type": "object", "additionalProperties": {"type": "integer"}}, + "dependencies": {"type": "object"}, + "tests": {"type": "array", "items": {"type": "string"}}, + "entry_points": {"type": "array", "items": {"type": "string"}}, + "git": {"type": "object"} + } + }, + "example_call": { + "path": ".", + "max_files": 1000, + "max_depth": 5 + }, + "clone_url": null, + "price_to_clone": 0, + "tool_version": "0.1.0", + "source_repo": null, + "source_hash": null +} diff --git a/repo_snapshot.py b/repo_snapshot.py new file mode 100755 index 0000000..5807ac4 --- /dev/null +++ b/repo_snapshot.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +import argparse +import datetime as dt +import json +import os +import re +import subprocess +import sys +from typing import Dict, List, Tuple + + +IGNORED_DIRS = { + ".git", + "node_modules", + "dist", + "build", + ".venv", + "venv", + "__pycache__", + ".mypy_cache", + ".pytest_cache", +} + +LANGUAGE_BY_EXT = { + ".py": "Python", + ".js": "JavaScript", + ".ts": "TypeScript", + ".tsx": "TypeScript", + ".jsx": "JavaScript", + ".go": "Go", + ".rs": "Rust", + ".java": "Java", + ".kt": "Kotlin", + ".rb": "Ruby", + ".php": "PHP", + ".cs": "C#", + ".c": "C", + ".h": "C", + ".cpp": "C++", + ".hpp": "C++", + ".swift": "Swift", + ".m": "Objective-C", + ".sh": "Shell", + ".yml": "YAML", + ".yaml": "YAML", + ".json": "JSON", + ".toml": "TOML", + ".md": "Markdown", +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Summarize a repository into JSON.") + parser.add_argument("path", nargs="?", default=".", help="Path to the repository.") + parser.add_argument("--max-files", type=int, default=2000, help="Max files to scan.") + parser.add_argument("--max-depth", type=int, default=6, help="Max directory depth.") + return parser.parse_args() + + +def is_ignored_dir(name: str) -> bool: + return name in IGNORED_DIRS + + +def detect_language(ext: str) -> str: + return LANGUAGE_BY_EXT.get(ext, ext[1:].upper() if ext else "UNKNOWN") + + +def read_json(path: str): + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return None + + +def read_text(path: str) -> str: + try: + with open(path, "r", encoding="utf-8") as f: + return f.read() + except Exception: + return "" + + +def parse_requirements(text: str) -> List[str]: + deps = [] + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + deps.append(line) + return deps + + +def parse_go_mod(text: str) -> List[str]: + deps = [] + for line in text.splitlines(): + line = line.strip() + if line.startswith("require "): + parts = line.split() + if len(parts) >= 2: + deps.append(parts[1]) + elif line and not line.startswith("//") and not line.startswith("module ") and not line.startswith("go "): + if re.match(r"^[a-zA-Z0-9_.\-/]+\s+v", line): + deps.append(line.split()[0]) + return deps + + +def parse_pyproject(text: str) -> List[str]: + try: + import tomllib # type: ignore + except Exception: + return [] + try: + data = tomllib.loads(text) + except Exception: + return [] + deps: List[str] = [] + project = data.get("project") or {} + deps.extend(project.get("dependencies") or []) + deps.extend((project.get("optional-dependencies") or {}).values()) + tool = data.get("tool") or {} + poetry = tool.get("poetry") or {} + deps.extend(list((poetry.get("dependencies") or {}).keys())) + dev = poetry.get("dev-dependencies") or {} + deps.extend(list(dev.keys())) + flat: List[str] = [] + for item in deps: + if isinstance(item, list): + flat.extend(item) + elif isinstance(item, str): + flat.append(item) + return [d for d in flat if d and isinstance(d, str)] + + +def detect_tests(root: str, package_json: dict) -> List[str]: + indicators = [] + for name in ["tests", "test", "__tests__"]: + if os.path.isdir(os.path.join(root, name)): + indicators.append(f"dir:{name}") + for file_name in ["pytest.ini", "tox.ini", "jest.config.js", "jest.config.ts"]: + if os.path.isfile(os.path.join(root, file_name)): + indicators.append(f"file:{file_name}") + scripts = (package_json or {}).get("scripts") or {} + if "test" in scripts: + indicators.append("npm_script:test") + return indicators + + +def detect_entry_points(root: str, package_json: dict) -> List[str]: + entry_points = [] + if package_json: + if "main" in package_json: + entry_points.append(f"npm_main:{package_json['main']}") + bin_field = package_json.get("bin") + if isinstance(bin_field, dict): + for name, val in bin_field.items(): + entry_points.append(f"npm_bin:{name}={val}") + elif isinstance(bin_field, str): + entry_points.append(f"npm_bin:{bin_field}") + if os.path.isfile(os.path.join(root, "__main__.py")): + entry_points.append("python:__main__.py") + return entry_points + + +def git_info(root: str) -> Dict[str, str]: + if not os.path.isdir(os.path.join(root, ".git")): + return {} + info = {} + try: + branch = subprocess.check_output( + ["git", "-C", root, "rev-parse", "--abbrev-ref", "HEAD"], + stderr=subprocess.DEVNULL, + ).decode().strip() + info["branch"] = branch + except Exception: + pass + try: + commit = subprocess.check_output( + ["git", "-C", root, "rev-parse", "HEAD"], + stderr=subprocess.DEVNULL, + ).decode().strip() + info["commit"] = commit + except Exception: + pass + return info + + +def walk_repo(root: str, max_files: int, max_depth: int) -> Tuple[int, int, int, Dict[str, int]]: + file_count = 0 + dir_count = 0 + total_bytes = 0 + langs: Dict[str, int] = {} + + for current, dirs, files in os.walk(root): + rel = os.path.relpath(current, root) + depth = 0 if rel == "." else rel.count(os.sep) + 1 + if depth > max_depth: + dirs[:] = [] + continue + dirs[:] = [d for d in dirs if not is_ignored_dir(d)] + dir_count += 1 + + for name in files: + file_count += 1 + if file_count > max_files: + return file_count, dir_count, total_bytes, langs + path = os.path.join(current, name) + try: + size = os.path.getsize(path) + except Exception: + size = 0 + total_bytes += size + _, ext = os.path.splitext(name) + lang = detect_language(ext.lower()) + langs[lang] = langs.get(lang, 0) + 1 + + return file_count, dir_count, total_bytes, langs + + +def main() -> int: + args = parse_args() + root = os.path.abspath(args.path) + max_files = max(args.max_files, 1) + max_depth = max(args.max_depth, 0) + + package_json = read_json(os.path.join(root, "package.json")) or {} + pyproject_text = read_text(os.path.join(root, "pyproject.toml")) + requirements_text = read_text(os.path.join(root, "requirements.txt")) + go_mod_text = read_text(os.path.join(root, "go.mod")) + + file_count, dir_count, total_bytes, langs = walk_repo(root, max_files, max_depth) + + deps = { + "npm": sorted(list((package_json.get("dependencies") or {}).keys())), + "npm_dev": sorted(list((package_json.get("devDependencies") or {}).keys())), + "python": sorted(parse_requirements(requirements_text)), + "python_pyproject": sorted(parse_pyproject(pyproject_text)), + "go": sorted(parse_go_mod(go_mod_text)), + } + + tests = detect_tests(root, package_json) + entry_points = detect_entry_points(root, package_json) + + output = { + "root": root, + "generated_at": dt.datetime.now(dt.timezone.utc).isoformat().replace("+00:00", "Z"), + "stats": { + "file_count": file_count, + "dir_count": dir_count, + "total_bytes": total_bytes, + "max_files": max_files, + "max_depth": max_depth, + }, + "languages": dict(sorted(langs.items(), key=lambda x: (-x[1], x[0]))), + "dependencies": deps, + "tests": tests, + "entry_points": entry_points, + "git": git_info(root), + } + + json.dump(output, sys.stdout, indent=2, sort_keys=False) + sys.stdout.write("\n") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())