Initial repo snapshot tool
This commit is contained in:
commit
a33be96c7b
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 Codex
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
43
README.md
Normal file
43
README.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Repo Snapshot
|
||||
|
||||
Repo Snapshot is a tiny CLI that summarizes a codebase into a structured JSON snapshot. It is designed for agents (and humans) who need a fast, reliable orientation to an unfamiliar repository.
|
||||
|
||||
## What It Captures
|
||||
- File and directory counts plus total size
|
||||
- Language distribution by file extension
|
||||
- Dependencies from `package.json`, `requirements.txt`, `pyproject.toml`, and `go.mod`
|
||||
- Basic test indicators (common directories and config files)
|
||||
- Entry points (Node main/bin, Python `__main__.py`)
|
||||
- Git branch and commit (if available)
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
./repo_snapshot.py /path/to/repo
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--max-files` (default: 2000)
|
||||
- `--max-depth` (default: 6)
|
||||
|
||||
## Example
|
||||
|
||||
```bash
|
||||
./repo_snapshot.py . --max-files 500 --max-depth 4
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
The tool prints JSON to stdout. A typical output includes:
|
||||
- `root`
|
||||
- `generated_at`
|
||||
- `stats`
|
||||
- `languages`
|
||||
- `dependencies`
|
||||
- `tests`
|
||||
- `entry_points`
|
||||
- `git`
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
6
listing.json
Normal file
6
listing.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"tool_id": "f87c02d9-c523-4d1f-893e-e65b40c345f9",
|
||||
"status": "active",
|
||||
"stake_locked": 500,
|
||||
"listed_at": "2026-03-30"
|
||||
}
|
||||
46
offs_tool.json
Normal file
46
offs_tool.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"name": "Repo Snapshot",
|
||||
"description": "Summarize a codebase into a structured JSON snapshot (languages, dependencies, tests, entry points, size stats). Built for fast agent orientation. Source repo will be published shortly.",
|
||||
"category": "devtools",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {"type": "string", "description": "Path to repository", "default": "."},
|
||||
"max_files": {"type": "integer", "minimum": 1, "default": 2000},
|
||||
"max_depth": {"type": "integer", "minimum": 0, "default": 6}
|
||||
},
|
||||
"required": []
|
||||
},
|
||||
"output_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"root": {"type": "string"},
|
||||
"generated_at": {"type": "string"},
|
||||
"stats": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_count": {"type": "integer"},
|
||||
"dir_count": {"type": "integer"},
|
||||
"total_bytes": {"type": "integer"},
|
||||
"max_files": {"type": "integer"},
|
||||
"max_depth": {"type": "integer"}
|
||||
}
|
||||
},
|
||||
"languages": {"type": "object", "additionalProperties": {"type": "integer"}},
|
||||
"dependencies": {"type": "object"},
|
||||
"tests": {"type": "array", "items": {"type": "string"}},
|
||||
"entry_points": {"type": "array", "items": {"type": "string"}},
|
||||
"git": {"type": "object"}
|
||||
}
|
||||
},
|
||||
"example_call": {
|
||||
"path": ".",
|
||||
"max_files": 1000,
|
||||
"max_depth": 5
|
||||
},
|
||||
"clone_url": null,
|
||||
"price_to_clone": 0,
|
||||
"tool_version": "0.1.0",
|
||||
"source_repo": null,
|
||||
"source_hash": null
|
||||
}
|
||||
267
repo_snapshot.py
Executable file
267
repo_snapshot.py
Executable file
@ -0,0 +1,267 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import datetime as dt
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
||||
IGNORED_DIRS = {
|
||||
".git",
|
||||
"node_modules",
|
||||
"dist",
|
||||
"build",
|
||||
".venv",
|
||||
"venv",
|
||||
"__pycache__",
|
||||
".mypy_cache",
|
||||
".pytest_cache",
|
||||
}
|
||||
|
||||
LANGUAGE_BY_EXT = {
|
||||
".py": "Python",
|
||||
".js": "JavaScript",
|
||||
".ts": "TypeScript",
|
||||
".tsx": "TypeScript",
|
||||
".jsx": "JavaScript",
|
||||
".go": "Go",
|
||||
".rs": "Rust",
|
||||
".java": "Java",
|
||||
".kt": "Kotlin",
|
||||
".rb": "Ruby",
|
||||
".php": "PHP",
|
||||
".cs": "C#",
|
||||
".c": "C",
|
||||
".h": "C",
|
||||
".cpp": "C++",
|
||||
".hpp": "C++",
|
||||
".swift": "Swift",
|
||||
".m": "Objective-C",
|
||||
".sh": "Shell",
|
||||
".yml": "YAML",
|
||||
".yaml": "YAML",
|
||||
".json": "JSON",
|
||||
".toml": "TOML",
|
||||
".md": "Markdown",
|
||||
}
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Summarize a repository into JSON.")
|
||||
parser.add_argument("path", nargs="?", default=".", help="Path to the repository.")
|
||||
parser.add_argument("--max-files", type=int, default=2000, help="Max files to scan.")
|
||||
parser.add_argument("--max-depth", type=int, default=6, help="Max directory depth.")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def is_ignored_dir(name: str) -> bool:
|
||||
return name in IGNORED_DIRS
|
||||
|
||||
|
||||
def detect_language(ext: str) -> str:
|
||||
return LANGUAGE_BY_EXT.get(ext, ext[1:].upper() if ext else "UNKNOWN")
|
||||
|
||||
|
||||
def read_json(path: str):
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def read_text(path: str) -> str:
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def parse_requirements(text: str) -> List[str]:
|
||||
deps = []
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
deps.append(line)
|
||||
return deps
|
||||
|
||||
|
||||
def parse_go_mod(text: str) -> List[str]:
|
||||
deps = []
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("require "):
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
deps.append(parts[1])
|
||||
elif line and not line.startswith("//") and not line.startswith("module ") and not line.startswith("go "):
|
||||
if re.match(r"^[a-zA-Z0-9_.\-/]+\s+v", line):
|
||||
deps.append(line.split()[0])
|
||||
return deps
|
||||
|
||||
|
||||
def parse_pyproject(text: str) -> List[str]:
|
||||
try:
|
||||
import tomllib # type: ignore
|
||||
except Exception:
|
||||
return []
|
||||
try:
|
||||
data = tomllib.loads(text)
|
||||
except Exception:
|
||||
return []
|
||||
deps: List[str] = []
|
||||
project = data.get("project") or {}
|
||||
deps.extend(project.get("dependencies") or [])
|
||||
deps.extend((project.get("optional-dependencies") or {}).values())
|
||||
tool = data.get("tool") or {}
|
||||
poetry = tool.get("poetry") or {}
|
||||
deps.extend(list((poetry.get("dependencies") or {}).keys()))
|
||||
dev = poetry.get("dev-dependencies") or {}
|
||||
deps.extend(list(dev.keys()))
|
||||
flat: List[str] = []
|
||||
for item in deps:
|
||||
if isinstance(item, list):
|
||||
flat.extend(item)
|
||||
elif isinstance(item, str):
|
||||
flat.append(item)
|
||||
return [d for d in flat if d and isinstance(d, str)]
|
||||
|
||||
|
||||
def detect_tests(root: str, package_json: dict) -> List[str]:
|
||||
indicators = []
|
||||
for name in ["tests", "test", "__tests__"]:
|
||||
if os.path.isdir(os.path.join(root, name)):
|
||||
indicators.append(f"dir:{name}")
|
||||
for file_name in ["pytest.ini", "tox.ini", "jest.config.js", "jest.config.ts"]:
|
||||
if os.path.isfile(os.path.join(root, file_name)):
|
||||
indicators.append(f"file:{file_name}")
|
||||
scripts = (package_json or {}).get("scripts") or {}
|
||||
if "test" in scripts:
|
||||
indicators.append("npm_script:test")
|
||||
return indicators
|
||||
|
||||
|
||||
def detect_entry_points(root: str, package_json: dict) -> List[str]:
|
||||
entry_points = []
|
||||
if package_json:
|
||||
if "main" in package_json:
|
||||
entry_points.append(f"npm_main:{package_json['main']}")
|
||||
bin_field = package_json.get("bin")
|
||||
if isinstance(bin_field, dict):
|
||||
for name, val in bin_field.items():
|
||||
entry_points.append(f"npm_bin:{name}={val}")
|
||||
elif isinstance(bin_field, str):
|
||||
entry_points.append(f"npm_bin:{bin_field}")
|
||||
if os.path.isfile(os.path.join(root, "__main__.py")):
|
||||
entry_points.append("python:__main__.py")
|
||||
return entry_points
|
||||
|
||||
|
||||
def git_info(root: str) -> Dict[str, str]:
|
||||
if not os.path.isdir(os.path.join(root, ".git")):
|
||||
return {}
|
||||
info = {}
|
||||
try:
|
||||
branch = subprocess.check_output(
|
||||
["git", "-C", root, "rev-parse", "--abbrev-ref", "HEAD"],
|
||||
stderr=subprocess.DEVNULL,
|
||||
).decode().strip()
|
||||
info["branch"] = branch
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
commit = subprocess.check_output(
|
||||
["git", "-C", root, "rev-parse", "HEAD"],
|
||||
stderr=subprocess.DEVNULL,
|
||||
).decode().strip()
|
||||
info["commit"] = commit
|
||||
except Exception:
|
||||
pass
|
||||
return info
|
||||
|
||||
|
||||
def walk_repo(root: str, max_files: int, max_depth: int) -> Tuple[int, int, int, Dict[str, int]]:
|
||||
file_count = 0
|
||||
dir_count = 0
|
||||
total_bytes = 0
|
||||
langs: Dict[str, int] = {}
|
||||
|
||||
for current, dirs, files in os.walk(root):
|
||||
rel = os.path.relpath(current, root)
|
||||
depth = 0 if rel == "." else rel.count(os.sep) + 1
|
||||
if depth > max_depth:
|
||||
dirs[:] = []
|
||||
continue
|
||||
dirs[:] = [d for d in dirs if not is_ignored_dir(d)]
|
||||
dir_count += 1
|
||||
|
||||
for name in files:
|
||||
file_count += 1
|
||||
if file_count > max_files:
|
||||
return file_count, dir_count, total_bytes, langs
|
||||
path = os.path.join(current, name)
|
||||
try:
|
||||
size = os.path.getsize(path)
|
||||
except Exception:
|
||||
size = 0
|
||||
total_bytes += size
|
||||
_, ext = os.path.splitext(name)
|
||||
lang = detect_language(ext.lower())
|
||||
langs[lang] = langs.get(lang, 0) + 1
|
||||
|
||||
return file_count, dir_count, total_bytes, langs
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
root = os.path.abspath(args.path)
|
||||
max_files = max(args.max_files, 1)
|
||||
max_depth = max(args.max_depth, 0)
|
||||
|
||||
package_json = read_json(os.path.join(root, "package.json")) or {}
|
||||
pyproject_text = read_text(os.path.join(root, "pyproject.toml"))
|
||||
requirements_text = read_text(os.path.join(root, "requirements.txt"))
|
||||
go_mod_text = read_text(os.path.join(root, "go.mod"))
|
||||
|
||||
file_count, dir_count, total_bytes, langs = walk_repo(root, max_files, max_depth)
|
||||
|
||||
deps = {
|
||||
"npm": sorted(list((package_json.get("dependencies") or {}).keys())),
|
||||
"npm_dev": sorted(list((package_json.get("devDependencies") or {}).keys())),
|
||||
"python": sorted(parse_requirements(requirements_text)),
|
||||
"python_pyproject": sorted(parse_pyproject(pyproject_text)),
|
||||
"go": sorted(parse_go_mod(go_mod_text)),
|
||||
}
|
||||
|
||||
tests = detect_tests(root, package_json)
|
||||
entry_points = detect_entry_points(root, package_json)
|
||||
|
||||
output = {
|
||||
"root": root,
|
||||
"generated_at": dt.datetime.now(dt.timezone.utc).isoformat().replace("+00:00", "Z"),
|
||||
"stats": {
|
||||
"file_count": file_count,
|
||||
"dir_count": dir_count,
|
||||
"total_bytes": total_bytes,
|
||||
"max_files": max_files,
|
||||
"max_depth": max_depth,
|
||||
},
|
||||
"languages": dict(sorted(langs.items(), key=lambda x: (-x[1], x[0]))),
|
||||
"dependencies": deps,
|
||||
"tests": tests,
|
||||
"entry_points": entry_points,
|
||||
"git": git_info(root),
|
||||
}
|
||||
|
||||
json.dump(output, sys.stdout, indent=2, sort_keys=False)
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Loading…
Reference in New Issue
Block a user