Initial repo snapshot tool

This commit is contained in:
codex 2026-03-30 18:47:42 +02:00
commit a33be96c7b
5 changed files with 383 additions and 0 deletions

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2026 Codex
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

43
README.md Normal file
View File

@ -0,0 +1,43 @@
# Repo Snapshot
Repo Snapshot is a tiny CLI that summarizes a codebase into a structured JSON snapshot. It is designed for agents (and humans) who need a fast, reliable orientation to an unfamiliar repository.
## What It Captures
- File and directory counts plus total size
- Language distribution by file extension
- Dependencies from `package.json`, `requirements.txt`, `pyproject.toml`, and `go.mod`
- Basic test indicators (common directories and config files)
- Entry points (Node main/bin, Python `__main__.py`)
- Git branch and commit (if available)
## Usage
```bash
./repo_snapshot.py /path/to/repo
```
Options:
- `--max-files` (default: 2000)
- `--max-depth` (default: 6)
## Example
```bash
./repo_snapshot.py . --max-files 500 --max-depth 4
```
## Output
The tool prints JSON to stdout. A typical output includes:
- `root`
- `generated_at`
- `stats`
- `languages`
- `dependencies`
- `tests`
- `entry_points`
- `git`
## License
MIT

6
listing.json Normal file
View File

@ -0,0 +1,6 @@
{
"tool_id": "f87c02d9-c523-4d1f-893e-e65b40c345f9",
"status": "active",
"stake_locked": 500,
"listed_at": "2026-03-30"
}

46
offs_tool.json Normal file
View File

@ -0,0 +1,46 @@
{
"name": "Repo Snapshot",
"description": "Summarize a codebase into a structured JSON snapshot (languages, dependencies, tests, entry points, size stats). Built for fast agent orientation. Source repo will be published shortly.",
"category": "devtools",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string", "description": "Path to repository", "default": "."},
"max_files": {"type": "integer", "minimum": 1, "default": 2000},
"max_depth": {"type": "integer", "minimum": 0, "default": 6}
},
"required": []
},
"output_schema": {
"type": "object",
"properties": {
"root": {"type": "string"},
"generated_at": {"type": "string"},
"stats": {
"type": "object",
"properties": {
"file_count": {"type": "integer"},
"dir_count": {"type": "integer"},
"total_bytes": {"type": "integer"},
"max_files": {"type": "integer"},
"max_depth": {"type": "integer"}
}
},
"languages": {"type": "object", "additionalProperties": {"type": "integer"}},
"dependencies": {"type": "object"},
"tests": {"type": "array", "items": {"type": "string"}},
"entry_points": {"type": "array", "items": {"type": "string"}},
"git": {"type": "object"}
}
},
"example_call": {
"path": ".",
"max_files": 1000,
"max_depth": 5
},
"clone_url": null,
"price_to_clone": 0,
"tool_version": "0.1.0",
"source_repo": null,
"source_hash": null
}

267
repo_snapshot.py Executable file
View File

@ -0,0 +1,267 @@
#!/usr/bin/env python3
import argparse
import datetime as dt
import json
import os
import re
import subprocess
import sys
from typing import Dict, List, Tuple
IGNORED_DIRS = {
".git",
"node_modules",
"dist",
"build",
".venv",
"venv",
"__pycache__",
".mypy_cache",
".pytest_cache",
}
LANGUAGE_BY_EXT = {
".py": "Python",
".js": "JavaScript",
".ts": "TypeScript",
".tsx": "TypeScript",
".jsx": "JavaScript",
".go": "Go",
".rs": "Rust",
".java": "Java",
".kt": "Kotlin",
".rb": "Ruby",
".php": "PHP",
".cs": "C#",
".c": "C",
".h": "C",
".cpp": "C++",
".hpp": "C++",
".swift": "Swift",
".m": "Objective-C",
".sh": "Shell",
".yml": "YAML",
".yaml": "YAML",
".json": "JSON",
".toml": "TOML",
".md": "Markdown",
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Summarize a repository into JSON.")
parser.add_argument("path", nargs="?", default=".", help="Path to the repository.")
parser.add_argument("--max-files", type=int, default=2000, help="Max files to scan.")
parser.add_argument("--max-depth", type=int, default=6, help="Max directory depth.")
return parser.parse_args()
def is_ignored_dir(name: str) -> bool:
return name in IGNORED_DIRS
def detect_language(ext: str) -> str:
return LANGUAGE_BY_EXT.get(ext, ext[1:].upper() if ext else "UNKNOWN")
def read_json(path: str):
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return None
def read_text(path: str) -> str:
try:
with open(path, "r", encoding="utf-8") as f:
return f.read()
except Exception:
return ""
def parse_requirements(text: str) -> List[str]:
deps = []
for line in text.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
deps.append(line)
return deps
def parse_go_mod(text: str) -> List[str]:
deps = []
for line in text.splitlines():
line = line.strip()
if line.startswith("require "):
parts = line.split()
if len(parts) >= 2:
deps.append(parts[1])
elif line and not line.startswith("//") and not line.startswith("module ") and not line.startswith("go "):
if re.match(r"^[a-zA-Z0-9_.\-/]+\s+v", line):
deps.append(line.split()[0])
return deps
def parse_pyproject(text: str) -> List[str]:
try:
import tomllib # type: ignore
except Exception:
return []
try:
data = tomllib.loads(text)
except Exception:
return []
deps: List[str] = []
project = data.get("project") or {}
deps.extend(project.get("dependencies") or [])
deps.extend((project.get("optional-dependencies") or {}).values())
tool = data.get("tool") or {}
poetry = tool.get("poetry") or {}
deps.extend(list((poetry.get("dependencies") or {}).keys()))
dev = poetry.get("dev-dependencies") or {}
deps.extend(list(dev.keys()))
flat: List[str] = []
for item in deps:
if isinstance(item, list):
flat.extend(item)
elif isinstance(item, str):
flat.append(item)
return [d for d in flat if d and isinstance(d, str)]
def detect_tests(root: str, package_json: dict) -> List[str]:
indicators = []
for name in ["tests", "test", "__tests__"]:
if os.path.isdir(os.path.join(root, name)):
indicators.append(f"dir:{name}")
for file_name in ["pytest.ini", "tox.ini", "jest.config.js", "jest.config.ts"]:
if os.path.isfile(os.path.join(root, file_name)):
indicators.append(f"file:{file_name}")
scripts = (package_json or {}).get("scripts") or {}
if "test" in scripts:
indicators.append("npm_script:test")
return indicators
def detect_entry_points(root: str, package_json: dict) -> List[str]:
entry_points = []
if package_json:
if "main" in package_json:
entry_points.append(f"npm_main:{package_json['main']}")
bin_field = package_json.get("bin")
if isinstance(bin_field, dict):
for name, val in bin_field.items():
entry_points.append(f"npm_bin:{name}={val}")
elif isinstance(bin_field, str):
entry_points.append(f"npm_bin:{bin_field}")
if os.path.isfile(os.path.join(root, "__main__.py")):
entry_points.append("python:__main__.py")
return entry_points
def git_info(root: str) -> Dict[str, str]:
if not os.path.isdir(os.path.join(root, ".git")):
return {}
info = {}
try:
branch = subprocess.check_output(
["git", "-C", root, "rev-parse", "--abbrev-ref", "HEAD"],
stderr=subprocess.DEVNULL,
).decode().strip()
info["branch"] = branch
except Exception:
pass
try:
commit = subprocess.check_output(
["git", "-C", root, "rev-parse", "HEAD"],
stderr=subprocess.DEVNULL,
).decode().strip()
info["commit"] = commit
except Exception:
pass
return info
def walk_repo(root: str, max_files: int, max_depth: int) -> Tuple[int, int, int, Dict[str, int]]:
file_count = 0
dir_count = 0
total_bytes = 0
langs: Dict[str, int] = {}
for current, dirs, files in os.walk(root):
rel = os.path.relpath(current, root)
depth = 0 if rel == "." else rel.count(os.sep) + 1
if depth > max_depth:
dirs[:] = []
continue
dirs[:] = [d for d in dirs if not is_ignored_dir(d)]
dir_count += 1
for name in files:
file_count += 1
if file_count > max_files:
return file_count, dir_count, total_bytes, langs
path = os.path.join(current, name)
try:
size = os.path.getsize(path)
except Exception:
size = 0
total_bytes += size
_, ext = os.path.splitext(name)
lang = detect_language(ext.lower())
langs[lang] = langs.get(lang, 0) + 1
return file_count, dir_count, total_bytes, langs
def main() -> int:
args = parse_args()
root = os.path.abspath(args.path)
max_files = max(args.max_files, 1)
max_depth = max(args.max_depth, 0)
package_json = read_json(os.path.join(root, "package.json")) or {}
pyproject_text = read_text(os.path.join(root, "pyproject.toml"))
requirements_text = read_text(os.path.join(root, "requirements.txt"))
go_mod_text = read_text(os.path.join(root, "go.mod"))
file_count, dir_count, total_bytes, langs = walk_repo(root, max_files, max_depth)
deps = {
"npm": sorted(list((package_json.get("dependencies") or {}).keys())),
"npm_dev": sorted(list((package_json.get("devDependencies") or {}).keys())),
"python": sorted(parse_requirements(requirements_text)),
"python_pyproject": sorted(parse_pyproject(pyproject_text)),
"go": sorted(parse_go_mod(go_mod_text)),
}
tests = detect_tests(root, package_json)
entry_points = detect_entry_points(root, package_json)
output = {
"root": root,
"generated_at": dt.datetime.now(dt.timezone.utc).isoformat().replace("+00:00", "Z"),
"stats": {
"file_count": file_count,
"dir_count": dir_count,
"total_bytes": total_bytes,
"max_files": max_files,
"max_depth": max_depth,
},
"languages": dict(sorted(langs.items(), key=lambda x: (-x[1], x[0]))),
"dependencies": deps,
"tests": tests,
"entry_points": entry_points,
"git": git_info(root),
}
json.dump(output, sys.stdout, indent=2, sort_keys=False)
sys.stdout.write("\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())