Skip to content

Commit c356cd5

Browse files
authored
feat: Implement ConciseDocumentationAssessor (fixes #76) (#93)
- Analyzes README for conciseness and structure - Scoring: 30% length, 40% heading structure, 30% formatting - Detects: excessive length, poor heading density, walls of text - Rewards: bullet points, code blocks, concise paragraphs - Test result: AgentReady scored 70/100 (README has excellent length but too many headings) - Removed duplicate stub assessor for concise_documentation
1 parent a10da1b commit c356cd5

File tree

3 files changed

+280
-8
lines changed

3 files changed

+280
-8
lines changed

src/agentready/assessors/documentation.py

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,3 +543,280 @@ def _create_remediation(self) -> Remediation:
543543
),
544544
],
545545
)
546+
547+
548+
class ConciseDocumentationAssessor(BaseAssessor):
549+
"""Assesses documentation conciseness and structure.
550+
551+
Tier 2 Critical (3% weight) - Concise documentation improves LLM
552+
performance by reducing context window pollution and improving
553+
information retrieval speed.
554+
"""
555+
556+
@property
557+
def attribute_id(self) -> str:
558+
return "concise_documentation"
559+
560+
@property
561+
def tier(self) -> int:
562+
return 2 # Critical
563+
564+
@property
565+
def attribute(self) -> Attribute:
566+
return Attribute(
567+
id=self.attribute_id,
568+
name="Concise Documentation",
569+
category="Documentation",
570+
tier=self.tier,
571+
description="Documentation maximizes information density while minimizing token consumption",
572+
criteria="README <500 lines with clear structure, bullet points over prose",
573+
default_weight=0.03,
574+
)
575+
576+
def assess(self, repository: Repository) -> Finding:
577+
"""Check README for conciseness and structure.
578+
579+
Scoring:
580+
- README length (30%): <300 excellent, 300-500 good, 500-750 acceptable, >750 poor
581+
- Markdown structure (40%): Heading density (target 3-5 per 100 lines)
582+
- Concise formatting (30%): Bullet points, code blocks, no walls of text
583+
"""
584+
readme_path = repository.path / "README.md"
585+
586+
if not readme_path.exists():
587+
return Finding.not_applicable(
588+
self.attribute, reason="No README.md found in repository"
589+
)
590+
591+
try:
592+
content = readme_path.read_text(encoding="utf-8")
593+
except (OSError, UnicodeDecodeError) as e:
594+
return Finding.error(
595+
self.attribute, reason=f"Could not read README.md: {e}"
596+
)
597+
598+
# Analyze README
599+
lines = content.splitlines()
600+
line_count = len(lines)
601+
602+
# Check 1: README length (30%)
603+
length_score = self._calculate_length_score(line_count)
604+
605+
# Check 2: Markdown structure (40%)
606+
headings = re.findall(r"^#{1,6} .+$", content, re.MULTILINE)
607+
heading_count = len(headings)
608+
structure_score = self._calculate_structure_score(heading_count, line_count)
609+
610+
# Check 3: Concise formatting (30%)
611+
bullets = len(re.findall(r"^[\-\*] .+$", content, re.MULTILINE))
612+
code_blocks = len(re.findall(r"```", content)) // 2 # Pairs of backticks
613+
long_paragraphs = self._count_long_paragraphs(content)
614+
formatting_score = self._calculate_formatting_score(
615+
bullets, code_blocks, long_paragraphs
616+
)
617+
618+
# Calculate total score
619+
score = (
620+
(length_score * 0.3) + (structure_score * 0.4) + (formatting_score * 0.3)
621+
)
622+
623+
status = "pass" if score >= 75 else "fail"
624+
625+
# Build evidence
626+
evidence = []
627+
628+
# Length evidence
629+
if line_count < 300:
630+
evidence.append(f"README length: {line_count} lines (excellent)")
631+
elif line_count < 500:
632+
evidence.append(f"README length: {line_count} lines (good)")
633+
elif line_count < 750:
634+
evidence.append(f"README length: {line_count} lines (acceptable)")
635+
else:
636+
evidence.append(f"README length: {line_count} lines (excessive)")
637+
638+
# Structure evidence
639+
heading_density = (heading_count / max(line_count, 1)) * 100
640+
if 3 <= heading_density <= 5:
641+
evidence.append(
642+
f"Heading density: {heading_density:.1f} per 100 lines (good structure)"
643+
)
644+
else:
645+
evidence.append(
646+
f"Heading density: {heading_density:.1f} per 100 lines (target: 3-5)"
647+
)
648+
649+
# Formatting evidence
650+
if bullets > 10 and long_paragraphs == 0:
651+
evidence.append(
652+
f"{bullets} bullet points, {code_blocks} code blocks (concise formatting)"
653+
)
654+
elif long_paragraphs > 0:
655+
evidence.append(
656+
f"{long_paragraphs} paragraphs exceed 10 lines (walls of text)"
657+
)
658+
else:
659+
evidence.append(f"Only {bullets} bullet points (prefer bullets over prose)")
660+
661+
return Finding(
662+
attribute=self.attribute,
663+
status=status,
664+
score=score,
665+
measured_value=f"{line_count} lines, {heading_count} headings, {bullets} bullets",
666+
threshold="<500 lines, structured format",
667+
evidence=evidence,
668+
remediation=self._create_remediation() if status == "fail" else None,
669+
error_message=None,
670+
)
671+
672+
def _calculate_length_score(self, line_count: int) -> float:
673+
"""Calculate score based on README length.
674+
675+
<300 lines: 100%
676+
300-500: 80%
677+
500-750: 60%
678+
>750: 0%
679+
"""
680+
if line_count < 300:
681+
return 100.0
682+
elif line_count < 500:
683+
return 80.0
684+
elif line_count < 750:
685+
return 60.0
686+
else:
687+
return 0.0
688+
689+
def _calculate_structure_score(self, heading_count: int, line_count: int) -> float:
690+
"""Calculate score based on heading density.
691+
692+
Target: 3-5 headings per 100 lines
693+
"""
694+
if line_count == 0:
695+
return 0.0
696+
697+
density = (heading_count / line_count) * 100
698+
699+
# Optimal range: 3-5 headings per 100 lines
700+
if 3 <= density <= 5:
701+
return 100.0
702+
elif 2 <= density < 3 or 5 < density <= 7:
703+
return 80.0
704+
elif 1 <= density < 2 or 7 < density <= 10:
705+
return 60.0
706+
else:
707+
return 40.0
708+
709+
def _calculate_formatting_score(
710+
self, bullets: int, code_blocks: int, long_paragraphs: int
711+
) -> float:
712+
"""Calculate score based on formatting style.
713+
714+
Rewards: bullet points, code blocks
715+
Penalizes: long paragraphs (walls of text)
716+
"""
717+
score = 50.0 # Base score
718+
719+
# Reward bullet points
720+
if bullets > 20:
721+
score += 30
722+
elif bullets > 10:
723+
score += 20
724+
elif bullets > 5:
725+
score += 10
726+
727+
# Reward code blocks
728+
if code_blocks > 5:
729+
score += 20
730+
elif code_blocks > 2:
731+
score += 10
732+
733+
# Penalize long paragraphs
734+
if long_paragraphs == 0:
735+
score += 0 # No penalty
736+
elif long_paragraphs <= 3:
737+
score -= 20
738+
else:
739+
score -= 40
740+
741+
return max(0, min(100, score))
742+
743+
def _count_long_paragraphs(self, content: str) -> int:
744+
"""Count paragraphs exceeding 10 lines (walls of text)."""
745+
# Split by double newlines to find paragraphs
746+
paragraphs = re.split(r"\n\n+", content)
747+
748+
long_count = 0
749+
for para in paragraphs:
750+
# Skip code blocks and lists
751+
if para.strip().startswith("```") or para.strip().startswith("-"):
752+
continue
753+
754+
lines = para.count("\n") + 1
755+
if lines > 10:
756+
long_count += 1
757+
758+
return long_count
759+
760+
def _create_remediation(self) -> Remediation:
761+
"""Create remediation guidance for verbose documentation."""
762+
return Remediation(
763+
summary="Make documentation more concise and structured",
764+
steps=[
765+
"Break long README into multiple documents (docs/ directory)",
766+
"Add clear Markdown headings (##, ###) for structure",
767+
"Convert prose paragraphs to bullet points where possible",
768+
"Add table of contents for documents >100 lines",
769+
"Use code blocks instead of describing commands in prose",
770+
"Move detailed content to wiki or docs/, keep README focused",
771+
],
772+
tools=[],
773+
commands=[
774+
"# Check README length",
775+
"wc -l README.md",
776+
"",
777+
"# Count headings",
778+
"grep -c '^#' README.md",
779+
],
780+
examples=[
781+
"""# Good: Concise with structure
782+
783+
## Quick Start
784+
```bash
785+
pip install -e .
786+
agentready assess .
787+
```
788+
789+
## Features
790+
- Fast repository scanning
791+
- HTML and Markdown reports
792+
- 25 agent-ready attributes
793+
794+
## Documentation
795+
See [docs/](docs/) for detailed guides.
796+
""",
797+
"""# Bad: Verbose prose
798+
799+
This project is a tool that helps you assess your repository
800+
against best practices for AI-assisted development. It works by
801+
scanning your codebase and checking for various attributes that
802+
make repositories more effective when working with AI coding
803+
assistants like Claude Code...
804+
805+
[Many more paragraphs of prose...]
806+
""",
807+
],
808+
citations=[
809+
Citation(
810+
source="ArXiv",
811+
title="LongCodeBench: Evaluating Coding LLMs at 1M Context Windows",
812+
url="https://arxiv.org/abs/2501.00343",
813+
relevance="Research showing performance degradation with long contexts",
814+
),
815+
Citation(
816+
source="Markdown Guide",
817+
title="Basic Syntax",
818+
url="https://www.markdownguide.org/basic-syntax/",
819+
relevance="Best practices for Markdown formatting",
820+
),
821+
],
822+
)

src/agentready/assessors/stub_assessors.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -276,13 +276,6 @@ def create_stub_assessors():
276276
2,
277277
0.03,
278278
),
279-
StubAssessor(
280-
"concise_documentation",
281-
"Concise Structured Documentation",
282-
"Context Window Optimization",
283-
2,
284-
0.03,
285-
),
286279
StubAssessor(
287280
"inline_documentation",
288281
"Inline Documentation",

src/agentready/cli/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from ..assessors.documentation import (
2222
ArchitectureDecisionsAssessor,
2323
CLAUDEmdAssessor,
24+
ConciseDocumentationAssessor,
2425
READMEAssessor,
2526
)
2627
from ..assessors.structure import (
@@ -76,13 +77,14 @@ def create_all_assessors():
7677
TypeAnnotationsAssessor(),
7778
StandardLayoutAssessor(),
7879
LockFilesAssessor(),
79-
# Tier 2 Critical (10 assessors - 4 implemented, 6 stubs)
80+
# Tier 2 Critical (10 assessors - 5 implemented, 5 stubs)
8081
TestCoverageAssessor(),
8182
PreCommitHooksAssessor(),
8283
ConventionalCommitsAssessor(),
8384
GitignoreAssessor(),
8485
OneCommandSetupAssessor(),
8586
SeparationOfConcernsAssessor(),
87+
ConciseDocumentationAssessor(),
8688
CyclomaticComplexityAssessor(), # Actually Tier 3, but including here
8789
# Tier 3 Important (4 implemented)
8890
ArchitectureDecisionsAssessor(),

0 commit comments

Comments
 (0)