268 lines
7.8 KiB
Python
Executable File
268 lines
7.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import datetime as dt
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from typing import Dict, List, Tuple
|
|
|
|
|
|
IGNORED_DIRS = {
|
|
".git",
|
|
"node_modules",
|
|
"dist",
|
|
"build",
|
|
".venv",
|
|
"venv",
|
|
"__pycache__",
|
|
".mypy_cache",
|
|
".pytest_cache",
|
|
}
|
|
|
|
LANGUAGE_BY_EXT = {
|
|
".py": "Python",
|
|
".js": "JavaScript",
|
|
".ts": "TypeScript",
|
|
".tsx": "TypeScript",
|
|
".jsx": "JavaScript",
|
|
".go": "Go",
|
|
".rs": "Rust",
|
|
".java": "Java",
|
|
".kt": "Kotlin",
|
|
".rb": "Ruby",
|
|
".php": "PHP",
|
|
".cs": "C#",
|
|
".c": "C",
|
|
".h": "C",
|
|
".cpp": "C++",
|
|
".hpp": "C++",
|
|
".swift": "Swift",
|
|
".m": "Objective-C",
|
|
".sh": "Shell",
|
|
".yml": "YAML",
|
|
".yaml": "YAML",
|
|
".json": "JSON",
|
|
".toml": "TOML",
|
|
".md": "Markdown",
|
|
}
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Summarize a repository into JSON.")
|
|
parser.add_argument("path", nargs="?", default=".", help="Path to the repository.")
|
|
parser.add_argument("--max-files", type=int, default=2000, help="Max files to scan.")
|
|
parser.add_argument("--max-depth", type=int, default=6, help="Max directory depth.")
|
|
return parser.parse_args()
|
|
|
|
|
|
def is_ignored_dir(name: str) -> bool:
|
|
return name in IGNORED_DIRS
|
|
|
|
|
|
def detect_language(ext: str) -> str:
|
|
return LANGUAGE_BY_EXT.get(ext, ext[1:].upper() if ext else "UNKNOWN")
|
|
|
|
|
|
def read_json(path: str):
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def read_text(path: str) -> str:
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return f.read()
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def parse_requirements(text: str) -> List[str]:
|
|
deps = []
|
|
for line in text.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
deps.append(line)
|
|
return deps
|
|
|
|
|
|
def parse_go_mod(text: str) -> List[str]:
|
|
deps = []
|
|
for line in text.splitlines():
|
|
line = line.strip()
|
|
if line.startswith("require "):
|
|
parts = line.split()
|
|
if len(parts) >= 2:
|
|
deps.append(parts[1])
|
|
elif line and not line.startswith("//") and not line.startswith("module ") and not line.startswith("go "):
|
|
if re.match(r"^[a-zA-Z0-9_.\-/]+\s+v", line):
|
|
deps.append(line.split()[0])
|
|
return deps
|
|
|
|
|
|
def parse_pyproject(text: str) -> List[str]:
|
|
try:
|
|
import tomllib # type: ignore
|
|
except Exception:
|
|
return []
|
|
try:
|
|
data = tomllib.loads(text)
|
|
except Exception:
|
|
return []
|
|
deps: List[str] = []
|
|
project = data.get("project") or {}
|
|
deps.extend(project.get("dependencies") or [])
|
|
deps.extend((project.get("optional-dependencies") or {}).values())
|
|
tool = data.get("tool") or {}
|
|
poetry = tool.get("poetry") or {}
|
|
deps.extend(list((poetry.get("dependencies") or {}).keys()))
|
|
dev = poetry.get("dev-dependencies") or {}
|
|
deps.extend(list(dev.keys()))
|
|
flat: List[str] = []
|
|
for item in deps:
|
|
if isinstance(item, list):
|
|
flat.extend(item)
|
|
elif isinstance(item, str):
|
|
flat.append(item)
|
|
return [d for d in flat if d and isinstance(d, str)]
|
|
|
|
|
|
def detect_tests(root: str, package_json: dict) -> List[str]:
|
|
indicators = []
|
|
for name in ["tests", "test", "__tests__"]:
|
|
if os.path.isdir(os.path.join(root, name)):
|
|
indicators.append(f"dir:{name}")
|
|
for file_name in ["pytest.ini", "tox.ini", "jest.config.js", "jest.config.ts"]:
|
|
if os.path.isfile(os.path.join(root, file_name)):
|
|
indicators.append(f"file:{file_name}")
|
|
scripts = (package_json or {}).get("scripts") or {}
|
|
if "test" in scripts:
|
|
indicators.append("npm_script:test")
|
|
return indicators
|
|
|
|
|
|
def detect_entry_points(root: str, package_json: dict) -> List[str]:
|
|
entry_points = []
|
|
if package_json:
|
|
if "main" in package_json:
|
|
entry_points.append(f"npm_main:{package_json['main']}")
|
|
bin_field = package_json.get("bin")
|
|
if isinstance(bin_field, dict):
|
|
for name, val in bin_field.items():
|
|
entry_points.append(f"npm_bin:{name}={val}")
|
|
elif isinstance(bin_field, str):
|
|
entry_points.append(f"npm_bin:{bin_field}")
|
|
if os.path.isfile(os.path.join(root, "__main__.py")):
|
|
entry_points.append("python:__main__.py")
|
|
return entry_points
|
|
|
|
|
|
def git_info(root: str) -> Dict[str, str]:
|
|
if not os.path.isdir(os.path.join(root, ".git")):
|
|
return {}
|
|
info = {}
|
|
try:
|
|
branch = subprocess.check_output(
|
|
["git", "-C", root, "rev-parse", "--abbrev-ref", "HEAD"],
|
|
stderr=subprocess.DEVNULL,
|
|
).decode().strip()
|
|
info["branch"] = branch
|
|
except Exception:
|
|
pass
|
|
try:
|
|
commit = subprocess.check_output(
|
|
["git", "-C", root, "rev-parse", "HEAD"],
|
|
stderr=subprocess.DEVNULL,
|
|
).decode().strip()
|
|
info["commit"] = commit
|
|
except Exception:
|
|
pass
|
|
return info
|
|
|
|
|
|
def walk_repo(root: str, max_files: int, max_depth: int) -> Tuple[int, int, int, Dict[str, int]]:
|
|
file_count = 0
|
|
dir_count = 0
|
|
total_bytes = 0
|
|
langs: Dict[str, int] = {}
|
|
|
|
for current, dirs, files in os.walk(root):
|
|
rel = os.path.relpath(current, root)
|
|
depth = 0 if rel == "." else rel.count(os.sep) + 1
|
|
if depth > max_depth:
|
|
dirs[:] = []
|
|
continue
|
|
dirs[:] = [d for d in dirs if not is_ignored_dir(d)]
|
|
dir_count += 1
|
|
|
|
for name in files:
|
|
file_count += 1
|
|
if file_count > max_files:
|
|
return file_count, dir_count, total_bytes, langs
|
|
path = os.path.join(current, name)
|
|
try:
|
|
size = os.path.getsize(path)
|
|
except Exception:
|
|
size = 0
|
|
total_bytes += size
|
|
_, ext = os.path.splitext(name)
|
|
lang = detect_language(ext.lower())
|
|
langs[lang] = langs.get(lang, 0) + 1
|
|
|
|
return file_count, dir_count, total_bytes, langs
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
root = os.path.abspath(args.path)
|
|
max_files = max(args.max_files, 1)
|
|
max_depth = max(args.max_depth, 0)
|
|
|
|
package_json = read_json(os.path.join(root, "package.json")) or {}
|
|
pyproject_text = read_text(os.path.join(root, "pyproject.toml"))
|
|
requirements_text = read_text(os.path.join(root, "requirements.txt"))
|
|
go_mod_text = read_text(os.path.join(root, "go.mod"))
|
|
|
|
file_count, dir_count, total_bytes, langs = walk_repo(root, max_files, max_depth)
|
|
|
|
deps = {
|
|
"npm": sorted(list((package_json.get("dependencies") or {}).keys())),
|
|
"npm_dev": sorted(list((package_json.get("devDependencies") or {}).keys())),
|
|
"python": sorted(parse_requirements(requirements_text)),
|
|
"python_pyproject": sorted(parse_pyproject(pyproject_text)),
|
|
"go": sorted(parse_go_mod(go_mod_text)),
|
|
}
|
|
|
|
tests = detect_tests(root, package_json)
|
|
entry_points = detect_entry_points(root, package_json)
|
|
|
|
output = {
|
|
"root": root,
|
|
"generated_at": dt.datetime.now(dt.timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
"stats": {
|
|
"file_count": file_count,
|
|
"dir_count": dir_count,
|
|
"total_bytes": total_bytes,
|
|
"max_files": max_files,
|
|
"max_depth": max_depth,
|
|
},
|
|
"languages": dict(sorted(langs.items(), key=lambda x: (-x[1], x[0]))),
|
|
"dependencies": deps,
|
|
"tests": tests,
|
|
"entry_points": entry_points,
|
|
"git": git_info(root),
|
|
}
|
|
|
|
json.dump(output, sys.stdout, indent=2, sort_keys=False)
|
|
sys.stdout.write("\n")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|