2026-02-08 04:53:31 -08:00
#!/usr/bin/env python3
"""
2026-02-09 00:21:54 -08:00
Lists the longest and shortest code files in the project, and counts duplicated function names across files. Useful for identifying potential refactoring targets and enforcing code size guidelines.
2026-02-08 04:53:31 -08:00
Threshold can be set to warn about files longer or shorter than a certain number of lines.
2026-02-09 11:34:18 -08:00
CI mode (--compare-to): Only warns about files that grew past threshold compared to a base ref.
Use --strict to exit non-zero on violations for CI gating.
2026-02-09 13:41:36 -08:00
GitHub Actions: when GITHUB_ACTIONS=true, emits ::error annotations on flagged files
and writes a Markdown job summary to $GITHUB_STEP_SUMMARY (if set).
2026-02-08 04:53:31 -08:00
"""
import os
import re
2026-02-09 11:34:18 -08:00
import sys
import subprocess
2026-02-08 04:53:31 -08:00
import argparse
from pathlib import Path
2026-02-09 11:34:18 -08:00
from typing import List , Tuple , Dict , Set , Optional
2026-02-08 04:53:31 -08:00
from collections import defaultdict
# File extensions to consider as code files
CODE_EXTENSIONS = {
' .ts ' , ' .tsx ' , ' .js ' , ' .jsx ' , ' .mjs ' , ' .cjs ' , # TypeScript/JavaScript
' .swift ' , # macOS/iOS
' .kt ' , ' .java ' , # Android
' .py ' , ' .sh ' , # Scripts
}
# Directories to skip
SKIP_DIRS = {
' node_modules ' , ' .git ' , ' dist ' , ' build ' , ' coverage ' ,
' __pycache__ ' , ' .turbo ' , ' out ' , ' .worktrees ' , ' vendor ' ,
2026-02-09 11:34:18 -08:00
' Pods ' , ' DerivedData ' , ' .gradle ' , ' .idea ' ,
' Swabble ' , # Separate Swift package
' skills ' , # Standalone skill scripts
' .pi ' , # Pi editor extensions
2026-02-08 04:53:31 -08:00
}
# Filename patterns to skip in short-file warnings (barrel exports, stubs)
SKIP_SHORT_PATTERNS = {
' index.js ' , ' index.ts ' , ' postinstall.js ' ,
}
SKIP_SHORT_SUFFIXES = ( ' -cli.ts ' , )
2026-02-09 13:18:51 -08:00
# Function names to skip in duplicate detection.
# Only list names so generic they're expected to appear independently in many modules.
# Do NOT use prefix-based skipping — it hides real duplication (e.g. formatDuration,
# stripPrefix, parseConfig are specific enough to flag).
2026-02-08 04:53:31 -08:00
SKIP_DUPLICATE_FUNCTIONS = {
2026-02-09 13:18:51 -08:00
# Lifecycle / framework plumbing
2026-02-08 04:53:31 -08:00
' main ' , ' init ' , ' setup ' , ' teardown ' , ' cleanup ' , ' dispose ' , ' destroy ' ,
' open ' , ' close ' , ' connect ' , ' disconnect ' , ' execute ' , ' run ' , ' start ' , ' stop ' ,
' render ' , ' update ' , ' refresh ' , ' reset ' , ' clear ' , ' flush ' ,
2026-02-09 13:18:51 -08:00
# Too-short / too-generic identifiers
' text ' , ' json ' , ' pad ' , ' mask ' , ' digest ' , ' confirm ' , ' intro ' , ' outro ' ,
' exists ' , ' send ' , ' receive ' , ' listen ' , ' log ' , ' warn ' , ' error ' , ' info ' ,
' help ' , ' version ' , ' config ' , ' configure ' , ' describe ' , ' test ' , ' action ' ,
2026-02-08 04:53:31 -08:00
}
SKIP_DUPLICATE_FILE_PATTERNS = ( ' .test.ts ' , ' .test.tsx ' , ' .spec.ts ' )
# Known packages in the monorepo
PACKAGES = {
' src ' , ' apps ' , ' extensions ' , ' packages ' , ' scripts ' , ' ui ' , ' test ' , ' docs '
}
def get_package ( file_path : Path , root_dir : Path ) - > str :
""" Get the package name for a file, or ' root ' if at top level. """
try :
relative = file_path . relative_to ( root_dir )
parts = relative . parts
if len ( parts ) > 0 and parts [ 0 ] in PACKAGES :
return parts [ 0 ]
return ' root '
except ValueError :
return ' root '
def count_lines ( file_path : Path ) - > int :
""" Count the number of lines in a file. """
try :
with open ( file_path , ' r ' , encoding = ' utf-8 ' , errors = ' ignore ' ) as f :
return sum ( 1 for _ in f )
except Exception :
return 0
def find_code_files ( root_dir : Path ) - > List [ Tuple [ Path , int ] ] :
""" Find all code files and their line counts. """
files_with_counts = [ ]
for dirpath , dirnames , filenames in os . walk ( root_dir ) :
# Remove skip directories from dirnames to prevent walking into them
dirnames [ : ] = [ d for d in dirnames if d not in SKIP_DIRS ]
for filename in filenames :
file_path = Path ( dirpath ) / filename
if file_path . suffix . lower ( ) in CODE_EXTENSIONS :
line_count = count_lines ( file_path )
files_with_counts . append ( ( file_path , line_count ) )
return files_with_counts
# Regex patterns for TypeScript functions (exported and internal)
TS_FUNCTION_PATTERNS = [
# export function name(...) or function name(...)
re . compile ( r ' ^(?:export \ s+)?(?:async \ s+)?function \ s+( \ w+) ' , re . MULTILINE ) ,
# export const name = or const name =
re . compile ( r ' ^(?:export \ s+)?const \ s+( \ w+) \ s*= \ s*(?: \ ([^)]* \ )| \ w+) \ s*=> ' , re . MULTILINE ) ,
]
def extract_functions ( file_path : Path ) - > Set [ str ] :
""" Extract function names from a TypeScript file. """
if file_path . suffix . lower ( ) not in { ' .ts ' , ' .tsx ' } :
return set ( )
try :
with open ( file_path , ' r ' , encoding = ' utf-8 ' , errors = ' ignore ' ) as f :
content = f . read ( )
except Exception :
return set ( )
2026-02-09 11:34:18 -08:00
return extract_functions_from_content ( content )
2026-02-08 04:53:31 -08:00
def find_duplicate_functions ( files : List [ Tuple [ Path , int ] ] , root_dir : Path ) - > Dict [ str , List [ Path ] ] :
""" Find function names that appear in multiple files. """
function_locations : Dict [ str , List [ Path ] ] = defaultdict ( list )
for file_path , _ in files :
# Skip test files for duplicate detection
if any ( file_path . name . endswith ( pat ) for pat in SKIP_DUPLICATE_FILE_PATTERNS ) :
continue
functions = extract_functions ( file_path )
for func in functions :
# Skip known common function names
if func in SKIP_DUPLICATE_FUNCTIONS :
continue
function_locations [ func ] . append ( file_path )
2026-02-09 13:18:51 -08:00
# Filter to only duplicates, ignoring cross-extension duplicates.
# Extensions are independent packages — the same function name in
# extensions/telegram and extensions/discord is expected, not duplication.
result : Dict [ str , List [ Path ] ] = { }
for name , paths in function_locations . items ( ) :
if len ( paths ) < 2 :
continue
# If ALL instances are in different extensions, skip
ext_dirs = set ( )
non_ext = False
for p in paths :
try :
rel = p . relative_to ( root_dir )
parts = rel . parts
if len ( parts ) > = 2 and parts [ 0 ] == ' extensions ' :
ext_dirs . add ( parts [ 1 ] )
else :
non_ext = True
except ValueError :
non_ext = True
# Skip if every instance lives in a different extension (no core overlap)
if not non_ext and len ( ext_dirs ) == len ( paths ) :
continue
result [ name ] = paths
return result
2026-02-08 04:53:31 -08:00
2026-02-09 11:34:18 -08:00
def validate_git_ref ( root_dir : Path , ref : str ) - > bool :
""" Validate that a git ref exists. Exits with error if not. """
try :
result = subprocess . run (
[ ' git ' , ' rev-parse ' , ' --verify ' , ref ] ,
capture_output = True ,
cwd = root_dir ,
encoding = ' utf-8 ' ,
)
return result . returncode == 0
except Exception :
return False
def get_file_content_at_ref ( file_path : Path , root_dir : Path , ref : str ) - > Optional [ str ] :
""" Get content of a file at a specific git ref. Returns None if file doesn ' t exist at ref. """
try :
relative_path = file_path . relative_to ( root_dir )
# Use forward slashes for git paths
git_path = str ( relative_path ) . replace ( ' \\ ' , ' / ' )
result = subprocess . run (
[ ' git ' , ' show ' , f ' { ref } : { git_path } ' ] ,
capture_output = True ,
cwd = root_dir ,
encoding = ' utf-8 ' ,
errors = ' ignore ' ,
)
if result . returncode != 0 :
stderr = result . stderr . strip ( )
# "does not exist" or "exists on disk, but not in" = file missing at ref (OK)
if ' does not exist ' in stderr or ' exists on disk ' in stderr :
return None
# Other errors (bad ref, git broken) = genuine failure
if stderr :
print ( f " ⚠️ git show error for { git_path } : { stderr } " , file = sys . stderr )
return None
return result . stdout
except Exception as e :
print ( f " ⚠️ failed to read { file_path } at { ref } : { e } " , file = sys . stderr )
return None
def get_line_count_at_ref ( file_path : Path , root_dir : Path , ref : str ) - > Optional [ int ] :
""" Get line count of a file at a specific git ref. Returns None if file doesn ' t exist at ref. """
content = get_file_content_at_ref ( file_path , root_dir , ref )
if content is None :
return None
return len ( content . splitlines ( ) )
def extract_functions_from_content ( content : str ) - > Set [ str ] :
""" Extract function names from TypeScript content string. """
functions = set ( )
for pattern in TS_FUNCTION_PATTERNS :
for match in pattern . finditer ( content ) :
functions . add ( match . group ( 1 ) )
return functions
def get_changed_files ( root_dir : Path , compare_ref : str ) - > Set [ str ] :
""" Get set of files changed between compare_ref and HEAD (relative paths with forward slashes). """
try :
result = subprocess . run (
[ ' git ' , ' diff ' , ' --name-only ' , compare_ref , ' HEAD ' ] ,
capture_output = True ,
cwd = root_dir ,
encoding = ' utf-8 ' ,
errors = ' ignore ' ,
)
if result . returncode != 0 :
return set ( )
return { line . strip ( ) for line in result . stdout . splitlines ( ) if line . strip ( ) }
except Exception :
return set ( )
def find_duplicate_regressions (
files : List [ Tuple [ Path , int ] ] ,
root_dir : Path ,
compare_ref : str ,
) - > Dict [ str , List [ Path ] ] :
"""
Find new duplicate function names that didn ' t exist at the base ref.
Only checks functions in files that changed to keep CI fast.
Returns dict of function_name -> list of current file paths, only for
duplicates that are new (weren ' t duplicated at compare_ref).
"""
# Build current duplicate map
current_dupes = find_duplicate_functions ( files , root_dir )
if not current_dupes :
return { }
# Get changed files to scope the comparison
changed_files = get_changed_files ( root_dir , compare_ref )
if not changed_files :
return { } # Nothing changed, no new duplicates possible
# Only check duplicate functions that involve at least one changed file
relevant_dupes : Dict [ str , List [ Path ] ] = { }
for func_name , paths in current_dupes . items ( ) :
involves_changed = any (
str ( p . relative_to ( root_dir ) ) . replace ( ' \\ ' , ' / ' ) in changed_files
for p in paths
)
if involves_changed :
relevant_dupes [ func_name ] = paths
if not relevant_dupes :
return { }
# For relevant duplicates, check if they were already duplicated at base ref
# Only need to read base versions of files involved in these duplicates
files_to_check : Set [ Path ] = set ( )
for paths in relevant_dupes . values ( ) :
files_to_check . update ( paths )
base_function_locations : Dict [ str , List [ Path ] ] = defaultdict ( list )
for file_path in files_to_check :
if file_path . suffix . lower ( ) not in { ' .ts ' , ' .tsx ' } :
continue
content = get_file_content_at_ref ( file_path , root_dir , compare_ref )
if content is None :
continue
functions = extract_functions_from_content ( content )
for func in functions :
if func in SKIP_DUPLICATE_FUNCTIONS :
continue
base_function_locations [ func ] . append ( file_path )
base_dupes = { name for name , paths in base_function_locations . items ( ) if len ( paths ) > 1 }
# Return only new duplicates
return { name : paths for name , paths in relevant_dupes . items ( ) if name not in base_dupes }
def find_threshold_regressions (
files : List [ Tuple [ Path , int ] ] ,
root_dir : Path ,
compare_ref : str ,
threshold : int ,
2026-02-09 11:51:51 -08:00
) - > Tuple [ List [ Tuple [ Path , int , Optional [ int ] ] ] , List [ Tuple [ Path , int , int ] ] ] :
2026-02-09 11:34:18 -08:00
"""
2026-02-09 11:51:51 -08:00
Find files that crossed the threshold or grew while already over it.
Returns two lists:
- crossed: (path, current_lines, base_lines) for files that newly crossed the threshold
- grew: (path, current_lines, base_lines) for files already over threshold that got larger
2026-02-09 11:34:18 -08:00
"""
2026-02-09 11:51:51 -08:00
crossed = [ ]
grew = [ ]
2026-02-09 11:34:18 -08:00
for file_path , current_lines in files :
if current_lines < threshold :
continue # Not over threshold now, skip
base_lines = get_line_count_at_ref ( file_path , root_dir , compare_ref )
if base_lines is None or base_lines < threshold :
2026-02-09 11:51:51 -08:00
# New file or crossed the threshold
crossed . append ( ( file_path , current_lines , base_lines ) )
elif current_lines > base_lines :
# Already over threshold and grew larger
grew . append ( ( file_path , current_lines , base_lines ) )
2026-02-09 11:34:18 -08:00
2026-02-09 11:51:51 -08:00
return crossed , grew
2026-02-09 11:34:18 -08:00
2026-02-09 13:41:36 -08:00
def _write_github_summary (
summary_path : str ,
crossed : List [ Tuple [ Path , int , Optional [ int ] ] ] ,
grew : List [ Tuple [ Path , int , int ] ] ,
new_dupes : Dict [ str , List [ Path ] ] ,
root_dir : Path ,
threshold : int ,
compare_ref : str ,
) - > None :
""" Write a Markdown job summary to $GITHUB_STEP_SUMMARY. """
lines : List [ str ] = [ ]
lines . append ( " ## Code Size Check Failed \n " )
2026-02-09 14:30:36 -08:00
lines . append ( " > ⚠️ **DO NOT trash the code base!** The goal is maintainability. \n " )
2026-02-09 13:41:36 -08:00
if crossed :
lines . append ( f " ### { len ( crossed ) } file(s) crossed the { threshold } -line threshold \n " )
lines . append ( " | File | Before | After | Delta | " )
lines . append ( " |------|-------:|------:|------:| " )
for file_path , current , base in crossed :
rel = str ( file_path . relative_to ( root_dir ) ) . replace ( ' \\ ' , ' / ' )
before = f " { base : , } " if base is not None else " new "
lines . append ( f " | ` { rel } ` | { before } | { current : , } | + { current - ( base or 0 ) : , } | " )
lines . append ( " " )
if grew :
lines . append ( f " ### { len ( grew ) } already-large file(s) grew larger \n " )
lines . append ( " | File | Before | After | Delta | " )
lines . append ( " |------|-------:|------:|------:| " )
for file_path , current , base in grew :
rel = str ( file_path . relative_to ( root_dir ) ) . replace ( ' \\ ' , ' / ' )
lines . append ( f " | ` { rel } ` | { base : , } | { current : , } | + { current - base : , } | " )
lines . append ( " " )
if new_dupes :
lines . append ( f " ### { len ( new_dupes ) } new duplicate function name(s) \n " )
lines . append ( " | Function | Files | " )
lines . append ( " |----------|-------| " )
for func_name in sorted ( new_dupes . keys ( ) ) :
paths = new_dupes [ func_name ]
file_list = " , " . join ( f " ` { str ( p . relative_to ( root_dir ) ) . replace ( chr ( 92 ) , ' / ' ) } ` " for p in paths )
lines . append ( f " | ` { func_name } ` | { file_list } | " )
lines . append ( " " )
lines . append ( " <details><summary>How to fix</summary> \n " )
lines . append ( " - Split large files into smaller, focused modules " )
lines . append ( " - Extract helpers, types, or constants into separate files " )
lines . append ( " - See `AGENTS.md` for guidelines (~500– 700 LOC target) " )
lines . append ( f " - This check compares your PR against ` { compare_ref } ` " )
lines . append ( f " - Only code files are checked: { ' , ' . join ( f ' ` { e } ` ' for e in sorted ( CODE_EXTENSIONS ) ) } " )
lines . append ( " - Docs, test names, and config files are **not** affected " )
lines . append ( " \n </details> " )
try :
with open ( summary_path , ' a ' , encoding = ' utf-8 ' ) as f :
f . write ( ' \n ' . join ( lines ) + ' \n ' )
except Exception as e :
print ( f " ⚠️ Failed to write job summary: { e } " , file = sys . stderr )
2026-02-08 04:53:31 -08:00
def main ( ) :
parser = argparse . ArgumentParser (
2026-02-09 00:32:57 -08:00
description = ' Analyze code files: list longest/shortest files, find duplicate function names '
2026-02-08 04:53:31 -08:00
)
parser . add_argument (
' -t ' , ' --threshold ' ,
type = int ,
default = 1000 ,
help = ' Warn about files longer than this many lines (default: 1000) '
)
parser . add_argument (
' --min-threshold ' ,
type = int ,
default = 10 ,
help = ' Warn about files shorter than this many lines (default: 10) '
)
parser . add_argument (
' -n ' , ' --top ' ,
type = int ,
default = 20 ,
help = ' Show top N longest files (default: 20) '
)
parser . add_argument (
' -b ' , ' --bottom ' ,
type = int ,
default = 10 ,
help = ' Show bottom N shortest files (default: 10) '
)
parser . add_argument (
' -d ' , ' --directory ' ,
type = str ,
default = ' . ' ,
help = ' Directory to scan (default: current directory) '
)
2026-02-09 11:34:18 -08:00
parser . add_argument (
' --compare-to ' ,
type = str ,
default = None ,
help = ' Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold. '
)
parser . add_argument (
' --strict ' ,
action = ' store_true ' ,
help = ' Exit with non-zero status if any violations found (for CI) '
)
2026-02-08 04:53:31 -08:00
args = parser . parse_args ( )
root_dir = Path ( args . directory ) . resolve ( )
2026-02-09 11:34:18 -08:00
# CI delta mode: only show regressions
if args . compare_to :
print ( f " \n 📂 Scanning: { root_dir } " )
print ( f " 🔍 Comparing to: { args . compare_to } \n " )
if not validate_git_ref ( root_dir , args . compare_to ) :
print ( f " ❌ Invalid git ref: { args . compare_to } " , file = sys . stderr )
print ( " Make sure the ref exists (e.g. run ' git fetch origin <branch> ' ) " , file = sys . stderr )
sys . exit ( 2 )
files = find_code_files ( root_dir )
violations = False
# Check file length regressions
2026-02-09 11:51:51 -08:00
crossed , grew = find_threshold_regressions ( files , root_dir , args . compare_to , args . threshold )
2026-02-09 11:34:18 -08:00
2026-02-09 11:51:51 -08:00
if crossed :
print ( f " ⚠️ { len ( crossed ) } file(s) crossed { args . threshold } line threshold: \n " )
for file_path , current , base in crossed :
2026-02-09 11:34:18 -08:00
relative_path = file_path . relative_to ( root_dir )
if base is None :
print ( f " { relative_path } : { current : , } lines (new file) " )
else :
print ( f " { relative_path } : { base : , } → { current : , } lines (+ { current - base : , } ) " )
print ( )
violations = True
else :
print ( f " ✅ No files crossed { args . threshold } line threshold " )
2026-02-09 11:51:51 -08:00
if grew :
print ( f " ⚠️ { len ( grew ) } already-large file(s) grew larger: \n " )
for file_path , current , base in grew :
relative_path = file_path . relative_to ( root_dir )
print ( f " { relative_path } : { base : , } → { current : , } lines (+ { current - base : , } ) " )
print ( )
violations = True
else :
print ( f " ✅ No already-large files grew " )
2026-02-09 11:34:18 -08:00
# Check new duplicate function names
new_dupes = find_duplicate_regressions ( files , root_dir , args . compare_to )
if new_dupes :
print ( f " ⚠️ { len ( new_dupes ) } new duplicate function name(s): \n " )
for func_name in sorted ( new_dupes . keys ( ) ) :
paths = new_dupes [ func_name ]
print ( f " { func_name } : " )
for path in paths :
print ( f " { path . relative_to ( root_dir ) } " )
print ( )
violations = True
else :
print ( f " ✅ No new duplicate function names " )
print ( )
if args . strict and violations :
2026-02-09 13:41:36 -08:00
# Emit GitHub Actions file annotations so violations appear inline in the PR diff
in_gha = os . environ . get ( ' GITHUB_ACTIONS ' ) == ' true '
if in_gha :
for file_path , current , base in crossed :
rel = str ( file_path . relative_to ( root_dir ) ) . replace ( ' \\ ' , ' / ' )
if base is None :
print ( f " ::error file= { rel } ,title=File over { args . threshold } lines:: { rel } is { current : , } lines (new file). Split into smaller modules. " )
else :
print ( f " ::error file= { rel } ,title=File crossed { args . threshold } lines:: { rel } grew from { base : , } to { current : , } lines (+ { current - base : , } ). Split into smaller modules. " )
for file_path , current , base in grew :
rel = str ( file_path . relative_to ( root_dir ) ) . replace ( ' \\ ' , ' / ' )
print ( f " ::error file= { rel } ,title=Large file grew larger:: { rel } is already { base : , } lines and grew to { current : , } (+ { current - base : , } ). Consider refactoring. " )
for func_name in sorted ( new_dupes . keys ( ) ) :
for p in new_dupes [ func_name ] :
rel = str ( p . relative_to ( root_dir ) ) . replace ( ' \\ ' , ' / ' )
print ( f " ::error file= { rel } ,title=Duplicate function ' { func_name } ' ::Function ' { func_name } ' appears in multiple files. Centralize or rename. " )
# Write GitHub Actions job summary (visible in the Actions check details)
summary_path = os . environ . get ( ' GITHUB_STEP_SUMMARY ' )
if summary_path :
_write_github_summary ( summary_path , crossed , grew , new_dupes , root_dir , args . threshold , args . compare_to )
2026-02-09 13:18:51 -08:00
# Print actionable summary so contributors know what to do
print ( " ─ " * 60 )
print ( " ❌ Code size check failed \n " )
2026-02-09 14:30:36 -08:00
print ( " ⚠️ DO NOT just trash the code base! " )
print ( " The goal is maintainability. \n " )
2026-02-09 13:18:51 -08:00
if crossed :
print ( f " { len ( crossed ) } file(s) grew past the { args . threshold } -line limit. " )
if grew :
print ( f " { len ( grew ) } file(s) already over { args . threshold } lines got larger. " )
print ( )
print ( " How to fix: " )
print ( " • Split large files into smaller, focused modules " )
print ( " • Extract helpers, types, or constants into separate files " )
print ( " • See AGENTS.md for guidelines (~500-700 LOC target) " )
print ( )
print ( f " This check compares your PR against { args . compare_to } . " )
print ( f " Only code files are checked ( { ' , ' . join ( sorted ( e for e in CODE_EXTENSIONS ) ) } ). " )
print ( " Docs, tests names, and config files are not affected. " )
print ( " ─ " * 60 )
2026-02-09 11:34:18 -08:00
sys . exit ( 1 )
2026-02-09 13:18:51 -08:00
elif args . strict :
print ( " ─ " * 60 )
print ( " ✅ Code size check passed — no files exceed thresholds. " )
print ( " ─ " * 60 )
2026-02-09 11:34:18 -08:00
return
2026-02-08 04:53:31 -08:00
print ( f " \n 📂 Scanning: { root_dir } \n " )
# Find and sort files by line count
files = find_code_files ( root_dir )
files_desc = sorted ( files , key = lambda x : x [ 1 ] , reverse = True )
files_asc = sorted ( files , key = lambda x : x [ 1 ] )
# Show top N longest files
top_files = files_desc [ : args . top ]
print ( f " 📊 Top { min ( args . top , len ( top_files ) ) } longest code files: \n " )
print ( f " { ' Lines ' : >8 } { ' File ' } " )
print ( " - " * 60 )
long_warnings = [ ]
for file_path , line_count in top_files :
relative_path = file_path . relative_to ( root_dir )
# Check if over threshold
if line_count > = args . threshold :
marker = " ⚠️ "
long_warnings . append ( ( relative_path , line_count ) )
else :
marker = " "
print ( f " { line_count : >8 } { relative_path } { marker } " )
# Show bottom N shortest files
bottom_files = files_asc [ : args . bottom ]
print ( f " \n 📉 Bottom { min ( args . bottom , len ( bottom_files ) ) } shortest code files: \n " )
print ( f " { ' Lines ' : >8 } { ' File ' } " )
print ( " - " * 60 )
short_warnings = [ ]
for file_path , line_count in bottom_files :
relative_path = file_path . relative_to ( root_dir )
filename = file_path . name
# Skip known barrel exports and stubs
is_expected_short = (
filename in SKIP_SHORT_PATTERNS or
any ( filename . endswith ( suffix ) for suffix in SKIP_SHORT_SUFFIXES )
)
# Check if under threshold
if line_count < = args . min_threshold and not is_expected_short :
marker = " ⚠️ "
short_warnings . append ( ( relative_path , line_count ) )
else :
marker = " "
print ( f " { line_count : >8 } { relative_path } { marker } " )
# Summary
total_files = len ( files )
total_lines = sum ( count for _ , count in files )
print ( " - " * 60 )
print ( f " \n 📈 Summary: " )
print ( f " Total code files: { total_files : , } " )
print ( f " Total lines: { total_lines : , } " )
print ( f " Average lines/file: { total_lines / / total_files if total_files else 0 : , } " )
# Per-package breakdown
package_stats : dict [ str , dict ] = { }
for file_path , line_count in files :
pkg = get_package ( file_path , root_dir )
if pkg not in package_stats :
package_stats [ pkg ] = { ' files ' : 0 , ' lines ' : 0 }
package_stats [ pkg ] [ ' files ' ] + = 1
package_stats [ pkg ] [ ' lines ' ] + = line_count
print ( f " \n 📦 Per-package breakdown: \n " )
print ( f " { ' Package ' : <15 } { ' Files ' : >8 } { ' Lines ' : >10 } { ' Avg ' : >8 } " )
print ( " - " * 45 )
for pkg in sorted ( package_stats . keys ( ) , key = lambda p : package_stats [ p ] [ ' lines ' ] , reverse = True ) :
stats = package_stats [ pkg ]
avg = stats [ ' lines ' ] / / stats [ ' files ' ] if stats [ ' files ' ] else 0
print ( f " { pkg : <15 } { stats [ ' files ' ] : >8, } { stats [ ' lines ' ] : >10, } { avg : >8, } " )
# Long file warnings
if long_warnings :
print ( f " \n ⚠️ Warning: { len ( long_warnings ) } file(s) exceed { args . threshold } lines (consider refactoring): " )
for path , count in long_warnings :
print ( f " - { path } ( { count : , } lines) " )
else :
print ( f " \n ✅ No files exceed { args . threshold } lines " )
# Short file warnings
if short_warnings :
print ( f " \n ⚠️ Warning: { len ( short_warnings ) } file(s) are { args . min_threshold } lines or less (check if needed): " )
for path , count in short_warnings :
print ( f " - { path } ( { count } lines) " )
else :
print ( f " \n ✅ No files are { args . min_threshold } lines or less " )
# Duplicate function names
duplicates = find_duplicate_functions ( files , root_dir )
if duplicates :
print ( f " \n ⚠️ Warning: { len ( duplicates ) } function name(s) appear in multiple files (consider renaming): " )
for func_name in sorted ( duplicates . keys ( ) ) :
paths = duplicates [ func_name ]
print ( f " - { func_name } : " )
for path in paths :
print ( f " { path . relative_to ( root_dir ) } " )
else :
print ( f " \n ✅ No duplicate function names " )
print ( )
2026-02-09 11:34:18 -08:00
# Exit with error if --strict and there are violations
if args . strict and long_warnings :
sys . exit ( 1 )
2026-02-08 04:53:31 -08:00
if __name__ == ' __main__ ' :
main ( )