@@ -543,3 +543,280 @@ def _create_remediation(self) -> Remediation:
543543 ),
544544 ],
545545 )
546+
547+
548+ class ConciseDocumentationAssessor (BaseAssessor ):
549+ """Assesses documentation conciseness and structure.
550+
551+ Tier 2 Critical (3% weight) - Concise documentation improves LLM
552+ performance by reducing context window pollution and improving
553+ information retrieval speed.
554+ """
555+
556+ @property
557+ def attribute_id (self ) -> str :
558+ return "concise_documentation"
559+
560+ @property
561+ def tier (self ) -> int :
562+ return 2 # Critical
563+
564+ @property
565+ def attribute (self ) -> Attribute :
566+ return Attribute (
567+ id = self .attribute_id ,
568+ name = "Concise Documentation" ,
569+ category = "Documentation" ,
570+ tier = self .tier ,
571+ description = "Documentation maximizes information density while minimizing token consumption" ,
572+ criteria = "README <500 lines with clear structure, bullet points over prose" ,
573+ default_weight = 0.03 ,
574+ )
575+
576+ def assess (self , repository : Repository ) -> Finding :
577+ """Check README for conciseness and structure.
578+
579+ Scoring:
580+ - README length (30%): <300 excellent, 300-500 good, 500-750 acceptable, >750 poor
581+ - Markdown structure (40%): Heading density (target 3-5 per 100 lines)
582+ - Concise formatting (30%): Bullet points, code blocks, no walls of text
583+ """
584+ readme_path = repository .path / "README.md"
585+
586+ if not readme_path .exists ():
587+ return Finding .not_applicable (
588+ self .attribute , reason = "No README.md found in repository"
589+ )
590+
591+ try :
592+ content = readme_path .read_text (encoding = "utf-8" )
593+ except (OSError , UnicodeDecodeError ) as e :
594+ return Finding .error (
595+ self .attribute , reason = f"Could not read README.md: { e } "
596+ )
597+
598+ # Analyze README
599+ lines = content .splitlines ()
600+ line_count = len (lines )
601+
602+ # Check 1: README length (30%)
603+ length_score = self ._calculate_length_score (line_count )
604+
605+ # Check 2: Markdown structure (40%)
606+ headings = re .findall (r"^#{1,6} .+$" , content , re .MULTILINE )
607+ heading_count = len (headings )
608+ structure_score = self ._calculate_structure_score (heading_count , line_count )
609+
610+ # Check 3: Concise formatting (30%)
611+ bullets = len (re .findall (r"^[\-\*] .+$" , content , re .MULTILINE ))
612+ code_blocks = len (re .findall (r"```" , content )) // 2 # Pairs of backticks
613+ long_paragraphs = self ._count_long_paragraphs (content )
614+ formatting_score = self ._calculate_formatting_score (
615+ bullets , code_blocks , long_paragraphs
616+ )
617+
618+ # Calculate total score
619+ score = (
620+ (length_score * 0.3 ) + (structure_score * 0.4 ) + (formatting_score * 0.3 )
621+ )
622+
623+ status = "pass" if score >= 75 else "fail"
624+
625+ # Build evidence
626+ evidence = []
627+
628+ # Length evidence
629+ if line_count < 300 :
630+ evidence .append (f"README length: { line_count } lines (excellent)" )
631+ elif line_count < 500 :
632+ evidence .append (f"README length: { line_count } lines (good)" )
633+ elif line_count < 750 :
634+ evidence .append (f"README length: { line_count } lines (acceptable)" )
635+ else :
636+ evidence .append (f"README length: { line_count } lines (excessive)" )
637+
638+ # Structure evidence
639+ heading_density = (heading_count / max (line_count , 1 )) * 100
640+ if 3 <= heading_density <= 5 :
641+ evidence .append (
642+ f"Heading density: { heading_density :.1f} per 100 lines (good structure)"
643+ )
644+ else :
645+ evidence .append (
646+ f"Heading density: { heading_density :.1f} per 100 lines (target: 3-5)"
647+ )
648+
649+ # Formatting evidence
650+ if bullets > 10 and long_paragraphs == 0 :
651+ evidence .append (
652+ f"{ bullets } bullet points, { code_blocks } code blocks (concise formatting)"
653+ )
654+ elif long_paragraphs > 0 :
655+ evidence .append (
656+ f"{ long_paragraphs } paragraphs exceed 10 lines (walls of text)"
657+ )
658+ else :
659+ evidence .append (f"Only { bullets } bullet points (prefer bullets over prose)" )
660+
661+ return Finding (
662+ attribute = self .attribute ,
663+ status = status ,
664+ score = score ,
665+ measured_value = f"{ line_count } lines, { heading_count } headings, { bullets } bullets" ,
666+ threshold = "<500 lines, structured format" ,
667+ evidence = evidence ,
668+ remediation = self ._create_remediation () if status == "fail" else None ,
669+ error_message = None ,
670+ )
671+
672+ def _calculate_length_score (self , line_count : int ) -> float :
673+ """Calculate score based on README length.
674+
675+ <300 lines: 100%
676+ 300-500: 80%
677+ 500-750: 60%
678+ >750: 0%
679+ """
680+ if line_count < 300 :
681+ return 100.0
682+ elif line_count < 500 :
683+ return 80.0
684+ elif line_count < 750 :
685+ return 60.0
686+ else :
687+ return 0.0
688+
689+ def _calculate_structure_score (self , heading_count : int , line_count : int ) -> float :
690+ """Calculate score based on heading density.
691+
692+ Target: 3-5 headings per 100 lines
693+ """
694+ if line_count == 0 :
695+ return 0.0
696+
697+ density = (heading_count / line_count ) * 100
698+
699+ # Optimal range: 3-5 headings per 100 lines
700+ if 3 <= density <= 5 :
701+ return 100.0
702+ elif 2 <= density < 3 or 5 < density <= 7 :
703+ return 80.0
704+ elif 1 <= density < 2 or 7 < density <= 10 :
705+ return 60.0
706+ else :
707+ return 40.0
708+
709+ def _calculate_formatting_score (
710+ self , bullets : int , code_blocks : int , long_paragraphs : int
711+ ) -> float :
712+ """Calculate score based on formatting style.
713+
714+ Rewards: bullet points, code blocks
715+ Penalizes: long paragraphs (walls of text)
716+ """
717+ score = 50.0 # Base score
718+
719+ # Reward bullet points
720+ if bullets > 20 :
721+ score += 30
722+ elif bullets > 10 :
723+ score += 20
724+ elif bullets > 5 :
725+ score += 10
726+
727+ # Reward code blocks
728+ if code_blocks > 5 :
729+ score += 20
730+ elif code_blocks > 2 :
731+ score += 10
732+
733+ # Penalize long paragraphs
734+ if long_paragraphs == 0 :
735+ score += 0 # No penalty
736+ elif long_paragraphs <= 3 :
737+ score -= 20
738+ else :
739+ score -= 40
740+
741+ return max (0 , min (100 , score ))
742+
743+ def _count_long_paragraphs (self , content : str ) -> int :
744+ """Count paragraphs exceeding 10 lines (walls of text)."""
745+ # Split by double newlines to find paragraphs
746+ paragraphs = re .split (r"\n\n+" , content )
747+
748+ long_count = 0
749+ for para in paragraphs :
750+ # Skip code blocks and lists
751+ if para .strip ().startswith ("```" ) or para .strip ().startswith ("-" ):
752+ continue
753+
754+ lines = para .count ("\n " ) + 1
755+ if lines > 10 :
756+ long_count += 1
757+
758+ return long_count
759+
760+ def _create_remediation (self ) -> Remediation :
761+ """Create remediation guidance for verbose documentation."""
762+ return Remediation (
763+ summary = "Make documentation more concise and structured" ,
764+ steps = [
765+ "Break long README into multiple documents (docs/ directory)" ,
766+ "Add clear Markdown headings (##, ###) for structure" ,
767+ "Convert prose paragraphs to bullet points where possible" ,
768+ "Add table of contents for documents >100 lines" ,
769+ "Use code blocks instead of describing commands in prose" ,
770+ "Move detailed content to wiki or docs/, keep README focused" ,
771+ ],
772+ tools = [],
773+ commands = [
774+ "# Check README length" ,
775+ "wc -l README.md" ,
776+ "" ,
777+ "# Count headings" ,
778+ "grep -c '^#' README.md" ,
779+ ],
780+ examples = [
781+ """# Good: Concise with structure
782+
783+ ## Quick Start
784+ ```bash
785+ pip install -e .
786+ agentready assess .
787+ ```
788+
789+ ## Features
790+ - Fast repository scanning
791+ - HTML and Markdown reports
792+ - 25 agent-ready attributes
793+
794+ ## Documentation
795+ See [docs/](docs/) for detailed guides.
796+ """ ,
797+ """# Bad: Verbose prose
798+
799+ This project is a tool that helps you assess your repository
800+ against best practices for AI-assisted development. It works by
801+ scanning your codebase and checking for various attributes that
802+ make repositories more effective when working with AI coding
803+ assistants like Claude Code...
804+
805+ [Many more paragraphs of prose...]
806+ """ ,
807+ ],
808+ citations = [
809+ Citation (
810+ source = "ArXiv" ,
811+ title = "LongCodeBench: Evaluating Coding LLMs at 1M Context Windows" ,
812+ url = "https://arxiv.org/abs/2501.00343" ,
813+ relevance = "Research showing performance degradation with long contexts" ,
814+ ),
815+ Citation (
816+ source = "Markdown Guide" ,
817+ title = "Basic Syntax" ,
818+ url = "https://www.markdownguide.org/basic-syntax/" ,
819+ relevance = "Best practices for Markdown formatting" ,
820+ ),
821+ ],
822+ )
0 commit comments