Initial repo snapshot tool
This commit is contained in:
commit
a33be96c7b
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 Codex
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
43
README.md
Normal file
43
README.md
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# Repo Snapshot
|
||||||
|
|
||||||
|
Repo Snapshot is a tiny CLI that summarizes a codebase into a structured JSON snapshot. It is designed for agents (and humans) who need a fast, reliable orientation to an unfamiliar repository.
|
||||||
|
|
||||||
|
## What It Captures
|
||||||
|
- File and directory counts plus total size
|
||||||
|
- Language distribution by file extension
|
||||||
|
- Dependencies from `package.json`, `requirements.txt`, `pyproject.toml`, and `go.mod`
|
||||||
|
- Basic test indicators (common directories and config files)
|
||||||
|
- Entry points (Node main/bin, Python `__main__.py`)
|
||||||
|
- Git branch and commit (if available)
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./repo_snapshot.py /path/to/repo
|
||||||
|
```
|
||||||
|
|
||||||
|
Options:
|
||||||
|
- `--max-files` (default: 2000)
|
||||||
|
- `--max-depth` (default: 6)
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./repo_snapshot.py . --max-files 500 --max-depth 4
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
|
||||||
|
The tool prints JSON to stdout. A typical output includes:
|
||||||
|
- `root`
|
||||||
|
- `generated_at`
|
||||||
|
- `stats`
|
||||||
|
- `languages`
|
||||||
|
- `dependencies`
|
||||||
|
- `tests`
|
||||||
|
- `entry_points`
|
||||||
|
- `git`
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT
|
||||||
6
listing.json
Normal file
6
listing.json
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"tool_id": "f87c02d9-c523-4d1f-893e-e65b40c345f9",
|
||||||
|
"status": "active",
|
||||||
|
"stake_locked": 500,
|
||||||
|
"listed_at": "2026-03-30"
|
||||||
|
}
|
||||||
46
offs_tool.json
Normal file
46
offs_tool.json
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
"name": "Repo Snapshot",
|
||||||
|
"description": "Summarize a codebase into a structured JSON snapshot (languages, dependencies, tests, entry points, size stats). Built for fast agent orientation. Source repo will be published shortly.",
|
||||||
|
"category": "devtools",
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"path": {"type": "string", "description": "Path to repository", "default": "."},
|
||||||
|
"max_files": {"type": "integer", "minimum": 1, "default": 2000},
|
||||||
|
"max_depth": {"type": "integer", "minimum": 0, "default": 6}
|
||||||
|
},
|
||||||
|
"required": []
|
||||||
|
},
|
||||||
|
"output_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"root": {"type": "string"},
|
||||||
|
"generated_at": {"type": "string"},
|
||||||
|
"stats": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"file_count": {"type": "integer"},
|
||||||
|
"dir_count": {"type": "integer"},
|
||||||
|
"total_bytes": {"type": "integer"},
|
||||||
|
"max_files": {"type": "integer"},
|
||||||
|
"max_depth": {"type": "integer"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"languages": {"type": "object", "additionalProperties": {"type": "integer"}},
|
||||||
|
"dependencies": {"type": "object"},
|
||||||
|
"tests": {"type": "array", "items": {"type": "string"}},
|
||||||
|
"entry_points": {"type": "array", "items": {"type": "string"}},
|
||||||
|
"git": {"type": "object"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"example_call": {
|
||||||
|
"path": ".",
|
||||||
|
"max_files": 1000,
|
||||||
|
"max_depth": 5
|
||||||
|
},
|
||||||
|
"clone_url": null,
|
||||||
|
"price_to_clone": 0,
|
||||||
|
"tool_version": "0.1.0",
|
||||||
|
"source_repo": null,
|
||||||
|
"source_hash": null
|
||||||
|
}
|
||||||
267
repo_snapshot.py
Executable file
267
repo_snapshot.py
Executable file
@ -0,0 +1,267 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
IGNORED_DIRS = {
|
||||||
|
".git",
|
||||||
|
"node_modules",
|
||||||
|
"dist",
|
||||||
|
"build",
|
||||||
|
".venv",
|
||||||
|
"venv",
|
||||||
|
"__pycache__",
|
||||||
|
".mypy_cache",
|
||||||
|
".pytest_cache",
|
||||||
|
}
|
||||||
|
|
||||||
|
LANGUAGE_BY_EXT = {
|
||||||
|
".py": "Python",
|
||||||
|
".js": "JavaScript",
|
||||||
|
".ts": "TypeScript",
|
||||||
|
".tsx": "TypeScript",
|
||||||
|
".jsx": "JavaScript",
|
||||||
|
".go": "Go",
|
||||||
|
".rs": "Rust",
|
||||||
|
".java": "Java",
|
||||||
|
".kt": "Kotlin",
|
||||||
|
".rb": "Ruby",
|
||||||
|
".php": "PHP",
|
||||||
|
".cs": "C#",
|
||||||
|
".c": "C",
|
||||||
|
".h": "C",
|
||||||
|
".cpp": "C++",
|
||||||
|
".hpp": "C++",
|
||||||
|
".swift": "Swift",
|
||||||
|
".m": "Objective-C",
|
||||||
|
".sh": "Shell",
|
||||||
|
".yml": "YAML",
|
||||||
|
".yaml": "YAML",
|
||||||
|
".json": "JSON",
|
||||||
|
".toml": "TOML",
|
||||||
|
".md": "Markdown",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Summarize a repository into JSON.")
|
||||||
|
parser.add_argument("path", nargs="?", default=".", help="Path to the repository.")
|
||||||
|
parser.add_argument("--max-files", type=int, default=2000, help="Max files to scan.")
|
||||||
|
parser.add_argument("--max-depth", type=int, default=6, help="Max directory depth.")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def is_ignored_dir(name: str) -> bool:
|
||||||
|
return name in IGNORED_DIRS
|
||||||
|
|
||||||
|
|
||||||
|
def detect_language(ext: str) -> str:
|
||||||
|
return LANGUAGE_BY_EXT.get(ext, ext[1:].upper() if ext else "UNKNOWN")
|
||||||
|
|
||||||
|
|
||||||
|
def read_json(path: str):
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def read_text(path: str) -> str:
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return f.read()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_requirements(text: str) -> List[str]:
|
||||||
|
deps = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
deps.append(line)
|
||||||
|
return deps
|
||||||
|
|
||||||
|
|
||||||
|
def parse_go_mod(text: str) -> List[str]:
|
||||||
|
deps = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("require "):
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) >= 2:
|
||||||
|
deps.append(parts[1])
|
||||||
|
elif line and not line.startswith("//") and not line.startswith("module ") and not line.startswith("go "):
|
||||||
|
if re.match(r"^[a-zA-Z0-9_.\-/]+\s+v", line):
|
||||||
|
deps.append(line.split()[0])
|
||||||
|
return deps
|
||||||
|
|
||||||
|
|
||||||
|
def parse_pyproject(text: str) -> List[str]:
|
||||||
|
try:
|
||||||
|
import tomllib # type: ignore
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
data = tomllib.loads(text)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
deps: List[str] = []
|
||||||
|
project = data.get("project") or {}
|
||||||
|
deps.extend(project.get("dependencies") or [])
|
||||||
|
deps.extend((project.get("optional-dependencies") or {}).values())
|
||||||
|
tool = data.get("tool") or {}
|
||||||
|
poetry = tool.get("poetry") or {}
|
||||||
|
deps.extend(list((poetry.get("dependencies") or {}).keys()))
|
||||||
|
dev = poetry.get("dev-dependencies") or {}
|
||||||
|
deps.extend(list(dev.keys()))
|
||||||
|
flat: List[str] = []
|
||||||
|
for item in deps:
|
||||||
|
if isinstance(item, list):
|
||||||
|
flat.extend(item)
|
||||||
|
elif isinstance(item, str):
|
||||||
|
flat.append(item)
|
||||||
|
return [d for d in flat if d and isinstance(d, str)]
|
||||||
|
|
||||||
|
|
||||||
|
def detect_tests(root: str, package_json: dict) -> List[str]:
|
||||||
|
indicators = []
|
||||||
|
for name in ["tests", "test", "__tests__"]:
|
||||||
|
if os.path.isdir(os.path.join(root, name)):
|
||||||
|
indicators.append(f"dir:{name}")
|
||||||
|
for file_name in ["pytest.ini", "tox.ini", "jest.config.js", "jest.config.ts"]:
|
||||||
|
if os.path.isfile(os.path.join(root, file_name)):
|
||||||
|
indicators.append(f"file:{file_name}")
|
||||||
|
scripts = (package_json or {}).get("scripts") or {}
|
||||||
|
if "test" in scripts:
|
||||||
|
indicators.append("npm_script:test")
|
||||||
|
return indicators
|
||||||
|
|
||||||
|
|
||||||
|
def detect_entry_points(root: str, package_json: dict) -> List[str]:
|
||||||
|
entry_points = []
|
||||||
|
if package_json:
|
||||||
|
if "main" in package_json:
|
||||||
|
entry_points.append(f"npm_main:{package_json['main']}")
|
||||||
|
bin_field = package_json.get("bin")
|
||||||
|
if isinstance(bin_field, dict):
|
||||||
|
for name, val in bin_field.items():
|
||||||
|
entry_points.append(f"npm_bin:{name}={val}")
|
||||||
|
elif isinstance(bin_field, str):
|
||||||
|
entry_points.append(f"npm_bin:{bin_field}")
|
||||||
|
if os.path.isfile(os.path.join(root, "__main__.py")):
|
||||||
|
entry_points.append("python:__main__.py")
|
||||||
|
return entry_points
|
||||||
|
|
||||||
|
|
||||||
|
def git_info(root: str) -> Dict[str, str]:
|
||||||
|
if not os.path.isdir(os.path.join(root, ".git")):
|
||||||
|
return {}
|
||||||
|
info = {}
|
||||||
|
try:
|
||||||
|
branch = subprocess.check_output(
|
||||||
|
["git", "-C", root, "rev-parse", "--abbrev-ref", "HEAD"],
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
).decode().strip()
|
||||||
|
info["branch"] = branch
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
commit = subprocess.check_output(
|
||||||
|
["git", "-C", root, "rev-parse", "HEAD"],
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
).decode().strip()
|
||||||
|
info["commit"] = commit
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
def walk_repo(root: str, max_files: int, max_depth: int) -> Tuple[int, int, int, Dict[str, int]]:
|
||||||
|
file_count = 0
|
||||||
|
dir_count = 0
|
||||||
|
total_bytes = 0
|
||||||
|
langs: Dict[str, int] = {}
|
||||||
|
|
||||||
|
for current, dirs, files in os.walk(root):
|
||||||
|
rel = os.path.relpath(current, root)
|
||||||
|
depth = 0 if rel == "." else rel.count(os.sep) + 1
|
||||||
|
if depth > max_depth:
|
||||||
|
dirs[:] = []
|
||||||
|
continue
|
||||||
|
dirs[:] = [d for d in dirs if not is_ignored_dir(d)]
|
||||||
|
dir_count += 1
|
||||||
|
|
||||||
|
for name in files:
|
||||||
|
file_count += 1
|
||||||
|
if file_count > max_files:
|
||||||
|
return file_count, dir_count, total_bytes, langs
|
||||||
|
path = os.path.join(current, name)
|
||||||
|
try:
|
||||||
|
size = os.path.getsize(path)
|
||||||
|
except Exception:
|
||||||
|
size = 0
|
||||||
|
total_bytes += size
|
||||||
|
_, ext = os.path.splitext(name)
|
||||||
|
lang = detect_language(ext.lower())
|
||||||
|
langs[lang] = langs.get(lang, 0) + 1
|
||||||
|
|
||||||
|
return file_count, dir_count, total_bytes, langs
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
root = os.path.abspath(args.path)
|
||||||
|
max_files = max(args.max_files, 1)
|
||||||
|
max_depth = max(args.max_depth, 0)
|
||||||
|
|
||||||
|
package_json = read_json(os.path.join(root, "package.json")) or {}
|
||||||
|
pyproject_text = read_text(os.path.join(root, "pyproject.toml"))
|
||||||
|
requirements_text = read_text(os.path.join(root, "requirements.txt"))
|
||||||
|
go_mod_text = read_text(os.path.join(root, "go.mod"))
|
||||||
|
|
||||||
|
file_count, dir_count, total_bytes, langs = walk_repo(root, max_files, max_depth)
|
||||||
|
|
||||||
|
deps = {
|
||||||
|
"npm": sorted(list((package_json.get("dependencies") or {}).keys())),
|
||||||
|
"npm_dev": sorted(list((package_json.get("devDependencies") or {}).keys())),
|
||||||
|
"python": sorted(parse_requirements(requirements_text)),
|
||||||
|
"python_pyproject": sorted(parse_pyproject(pyproject_text)),
|
||||||
|
"go": sorted(parse_go_mod(go_mod_text)),
|
||||||
|
}
|
||||||
|
|
||||||
|
tests = detect_tests(root, package_json)
|
||||||
|
entry_points = detect_entry_points(root, package_json)
|
||||||
|
|
||||||
|
output = {
|
||||||
|
"root": root,
|
||||||
|
"generated_at": dt.datetime.now(dt.timezone.utc).isoformat().replace("+00:00", "Z"),
|
||||||
|
"stats": {
|
||||||
|
"file_count": file_count,
|
||||||
|
"dir_count": dir_count,
|
||||||
|
"total_bytes": total_bytes,
|
||||||
|
"max_files": max_files,
|
||||||
|
"max_depth": max_depth,
|
||||||
|
},
|
||||||
|
"languages": dict(sorted(langs.items(), key=lambda x: (-x[1], x[0]))),
|
||||||
|
"dependencies": deps,
|
||||||
|
"tests": tests,
|
||||||
|
"entry_points": entry_points,
|
||||||
|
"git": git_info(root),
|
||||||
|
}
|
||||||
|
|
||||||
|
json.dump(output, sys.stdout, indent=2, sort_keys=False)
|
||||||
|
sys.stdout.write("\n")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Loading…
Reference in New Issue
Block a user