diff --git a/cmd/restore.go b/cmd/restore.go new file mode 100644 index 0000000..da03b00 --- /dev/null +++ b/cmd/restore.go @@ -0,0 +1,237 @@ +package cmd + +import ( + "archive/zip" + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/spf13/cobra" + + "github.com/supermodeltools/cli/internal/api" + "github.com/supermodeltools/cli/internal/cache" + "github.com/supermodeltools/cli/internal/config" + "github.com/supermodeltools/cli/internal/restore" +) + +func init() { + var localMode bool + var maxTokens int + var dir string + + c := &cobra.Command{ + Use: "restore", + Short: "Generate a project context summary to restore Claude's understanding", + Long: `Restore builds a high-level project summary (a "context bomb") and writes it +to stdout. Use it after Claude Code compacts its context window to re-establish +understanding of your codebase structure, domains, and key files. + +With an API key configured (run 'supermodel login'), restore calls the +Supermodel API for an AI-powered analysis including semantic domains, external +dependencies, and critical file ranking. + +Without an API key (or with --local), restore performs a local scan of the +repository file tree and produces a simpler structural summary. + +Examples: + + # pipe into Claude Code (typical use) + supermodel restore + + # use local analysis only, no API call + supermodel restore --local + + # increase the token budget for larger projects + supermodel restore --max-tokens 4000`, + RunE: func(cmd *cobra.Command, args []string) error { + return runRestore(cmd, dir, localMode, maxTokens) + }, + SilenceUsage: true, + } + + c.Flags().BoolVar(&localMode, "local", false, "use local file scan instead of Supermodel API") + c.Flags().IntVar(&maxTokens, "max-tokens", restore.DefaultMaxTokens, "maximum token budget for the output") + c.Flags().StringVar(&dir, "dir", "", "project directory (default: current working directory)") + + rootCmd.AddCommand(c) +} + +func runRestore(cmd *cobra.Command, dir string, localMode bool, maxTokens int) error { + // Resolve the project directory. + if dir == "" { + var err error + dir, err = os.Getwd() + if err != nil { + return fmt.Errorf("get working directory: %w", err) + } + } + rootDir := findGitRoot(dir) + + projectName := filepath.Base(rootDir) + + opts := restore.RenderOptions{ + MaxTokens: maxTokens, + ClaudeMD: restore.ReadClaudeMD(rootDir), + } + + var graph *restore.ProjectGraph + + cfg, _ := config.Load() + hasAPIKey := cfg != nil && cfg.APIKey != "" + + if !localMode && hasAPIKey { + var err error + graph, err = restoreViaAPI(cmd, cfg, rootDir, projectName) + if err != nil { + fmt.Fprintf(cmd.ErrOrStderr(), "warning: API analysis failed (%v), falling back to local mode\n", err) + } + } + + if graph == nil { + opts.LocalMode = true + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + var err error + graph, err = restore.BuildProjectGraph(ctx, rootDir, projectName) + if err != nil { + return fmt.Errorf("local analysis failed: %w", err) + } + } + + output, _, err := restore.Render(graph, projectName, opts) + if err != nil { + return fmt.Errorf("render: %w", err) + } + _, err = fmt.Fprint(cmd.OutOrStdout(), output) + return err +} + +func restoreViaAPI(cmd *cobra.Command, cfg *config.Config, rootDir, projectName string) (*restore.ProjectGraph, error) { + zipPath, err := restoreCreateZip(rootDir) + if err != nil { + return nil, fmt.Errorf("create archive: %w", err) + } + defer os.Remove(zipPath) + + hash, err := cache.HashFile(zipPath) + if err != nil { + return nil, fmt.Errorf("hash archive: %w", err) + } + + client := api.New(cfg) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + fmt.Fprintln(cmd.ErrOrStderr(), "Analyzing repository…") + ir, err := client.AnalyzeDomains(ctx, zipPath, "restore-"+hash[:16]) + if err != nil { + return nil, err + } + + graph := restore.FromSupermodelIR(ir, projectName) + return graph, nil +} + +// restoreCreateZip creates a temporary ZIP of the repository at dir. +// It tries git archive first (respects .gitignore), then falls back to a +// simple directory walk. Each vertical slice owns its own zip helper so that +// slice-specific behavior (file-size limits, skip lists) can diverge without +// coordination; see internal/analyze/zip.go for the canonical reference. +func restoreCreateZip(dir string) (string, error) { + f, err := os.CreateTemp("", "supermodel-restore-*.zip") + if err != nil { + return "", err + } + dest := f.Name() + f.Close() + + cmd := exec.Command("git", "-C", dir, "archive", "--format=zip", "-o", dest, "HEAD") + cmd.Stderr = os.Stderr + if err := cmd.Run(); err == nil { + return dest, nil + } + + // Fallback: walk the directory. + if err := restoreWalkZip(dir, dest); err != nil { + _ = os.Remove(dest) + return "", err + } + return dest, nil +} + +// restoreWalkZip archives dir into a ZIP at dest, skipping common build/cache dirs. +func restoreWalkZip(dir, dest string) error { + out, err := os.Create(dest) //nolint:gosec // dest is a temp file path from os.CreateTemp + if err != nil { + return err + } + defer out.Close() + + zw := zip.NewWriter(out) + defer zw.Close() + + skipDirs := map[string]bool{ + ".git": true, "node_modules": true, "vendor": true, "__pycache__": true, + ".venv": true, "venv": true, "dist": true, "build": true, "target": true, + ".next": true, ".nuxt": true, "coverage": true, ".terraform": true, + } + + return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + rel, err := filepath.Rel(dir, path) + if err != nil { + return err + } + if info.IsDir() { + if skipDirs[info.Name()] { + return filepath.SkipDir + } + return nil + } + if strings.HasPrefix(info.Name(), ".") || info.Size() > 10<<20 { + return nil + } + w, err := zw.Create(filepath.ToSlash(rel)) + if err != nil { + return err + } + return copyFileIntoZip(path, w) + }) +} + +// copyFileIntoZip opens path, copies its contents into w, then closes the file. +// Using an explicit Close (rather than defer) avoids accumulating open handles +// across all Walk iterations. +func copyFileIntoZip(path string, w io.Writer) error { + src, err := os.Open(path) //nolint:gosec // path is from filepath.Walk within dir + if err != nil { + return err + } + _, err = io.Copy(w, src) + src.Close() + return err +} + +// findGitRoot walks up from start to find the directory containing .git. +// Returns start itself if no .git directory is found. +func findGitRoot(start string) string { + dir := start + for { + if _, err := os.Stat(filepath.Join(dir, ".git")); err == nil { + return dir + } + parent := filepath.Dir(dir) + if parent == dir { + return start + } + dir = parent + } +} diff --git a/internal/api/client.go b/internal/api/client.go index f38b544..cb96a01 100644 --- a/internal/api/client.go +++ b/internal/api/client.go @@ -39,12 +39,39 @@ const analyzeEndpoint = "/v1/graphs/supermodel" // Analyze uploads a repository ZIP and runs the full analysis pipeline, // polling until the async job completes and returning the Graph. func (c *Client) Analyze(ctx context.Context, zipPath, idempotencyKey string) (*Graph, error) { - job, err := c.postZip(ctx, zipPath, idempotencyKey) + job, err := c.pollUntilComplete(ctx, zipPath, idempotencyKey) if err != nil { return nil, err } + var result jobResult + if err := json.Unmarshal(job.Result, &result); err != nil { + return nil, fmt.Errorf("decode graph result: %w", err) + } + return &result.Graph, nil +} + +// AnalyzeDomains uploads a repository ZIP and runs the full analysis pipeline, +// returning the complete SupermodelIR response (domains, summary, metadata, graph). +// Use this instead of Analyze when you need high-level domain information. +func (c *Client) AnalyzeDomains(ctx context.Context, zipPath, idempotencyKey string) (*SupermodelIR, error) { + job, err := c.pollUntilComplete(ctx, zipPath, idempotencyKey) + if err != nil { + return nil, err + } + var ir SupermodelIR + if err := json.Unmarshal(job.Result, &ir); err != nil { + return nil, fmt.Errorf("decode domain result: %w", err) + } + return &ir, nil +} - // Poll until the job completes. +// pollUntilComplete submits a ZIP to the analyze endpoint and polls until the +// async job reaches "completed" status, then returns the raw JobResponse. +func (c *Client) pollUntilComplete(ctx context.Context, zipPath, idempotencyKey string) (*JobResponse, error) { + job, err := c.postZip(ctx, zipPath, idempotencyKey) + if err != nil { + return nil, err + } for job.Status == "pending" || job.Status == "processing" { wait := time.Duration(job.RetryAfter) * time.Second if wait <= 0 { @@ -55,25 +82,18 @@ func (c *Client) Analyze(ctx context.Context, zipPath, idempotencyKey string) (* return nil, ctx.Err() case <-time.After(wait): } - job, err = c.postZip(ctx, zipPath, idempotencyKey) if err != nil { return nil, err } } - if job.Error != nil { return nil, fmt.Errorf("analysis failed: %s", *job.Error) } if job.Status != "completed" { return nil, fmt.Errorf("unexpected job status: %s", job.Status) } - - var result jobResult - if err := json.Unmarshal(job.Result, &result); err != nil { - return nil, fmt.Errorf("decode graph result: %w", err) - } - return &result.Graph, nil + return job, nil } // postZip sends the repository ZIP to the analyze endpoint and returns the diff --git a/internal/api/types.go b/internal/api/types.go index e71b270..c041b6b 100644 --- a/internal/api/types.go +++ b/internal/api/types.go @@ -93,6 +93,57 @@ func (g *Graph) NodeByID(id string) (Node, bool) { return Node{}, false } +// SupermodelIR is the full structured response returned inside a completed job +// result from /v1/graphs/supermodel. It contains high-level domain information +// in addition to the raw node/edge graph captured by Graph. +type SupermodelIR struct { + Repo string `json:"repo"` + Summary map[string]any `json:"summary"` + Metadata IRMetadata `json:"metadata"` + Domains []IRDomain `json:"domains"` + Graph IRGraph `json:"graph"` +} + +// IRMetadata holds file-count and language statistics from the API response. +type IRMetadata struct { + FileCount int `json:"fileCount"` + Languages []string `json:"languages"` +} + +// IRGraph is the raw node/relationship sub-graph embedded in SupermodelIR. +type IRGraph struct { + Nodes []IRNode `json:"nodes"` + Relationships []IRRelationship `json:"relationships"` +} + +// IRNode is a single node in the IRGraph. +type IRNode struct { + Type string `json:"type"` + Name string `json:"name"` +} + +// IRRelationship is a directed edge in the IRGraph. +type IRRelationship struct { + Type string `json:"type"` + Source string `json:"source"` + Target string `json:"target"` +} + +// IRDomain is the raw representation of a semantic domain from the API. +type IRDomain struct { + Name string `json:"name"` + DescriptionSummary string `json:"descriptionSummary"` + KeyFiles []string `json:"keyFiles"` + Responsibilities []string `json:"responsibilities"` + Subdomains []IRSubdomain `json:"subdomains"` +} + +// IRSubdomain is a named sub-area within an IRDomain. +type IRSubdomain struct { + Name string `json:"name"` + DescriptionSummary string `json:"descriptionSummary"` +} + // JobResponse is the async envelope returned by the API for long-running jobs. type JobResponse struct { Status string `json:"status"` diff --git a/internal/restore/doc.go b/internal/restore/doc.go new file mode 100644 index 0000000..03422d1 --- /dev/null +++ b/internal/restore/doc.go @@ -0,0 +1,15 @@ +// Package restore implements the "supermodel restore" command: it builds a +// high-level project summary (a "context bomb") and writes it to stdout so +// that Claude Code can re-establish codebase understanding after a context +// compaction event. +// +// Graph data comes from two sources: +// - API mode: calls /v1/graphs/supermodel and parses the full SupermodelIR +// response into a ProjectGraph with semantic domains, critical files, and +// external dependencies. +// - Local mode: scans the repository file tree without any network calls, +// grouping files by directory to produce a minimal ProjectGraph. +// +// The resulting ProjectGraph is rendered as Markdown with a configurable token +// budget (default 2 000 tokens) via Render. +package restore diff --git a/internal/restore/local.go b/internal/restore/local.go new file mode 100644 index 0000000..3b1656f --- /dev/null +++ b/internal/restore/local.go @@ -0,0 +1,545 @@ +package restore + +import ( + "context" + "encoding/json" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +// deepDirs are top-level directories that should be grouped at two levels +// (dir/subdir) to preserve per-package granularity. +var deepDirs = map[string]bool{ + "internal": true, "src": true, "pkg": true, "lib": true, "app": true, + "cmd": true, "pages": true, "routes": true, "components": true, + "hooks": true, "store": true, "features": true, "views": true, + "containers": true, "screens": true, "api": true, "controllers": true, + "services": true, "middleware": true, "handlers": true, +} + +// ignoreDirs are directory names excluded from the local scan. +var ignoreDirs = map[string]bool{ + ".git": true, ".svn": true, ".hg": true, "node_modules": true, + "vendor": true, "__pycache__": true, ".cache": true, "dist": true, + "build": true, "target": true, ".tox": true, "venv": true, + ".venv": true, "coverage": true, ".nyc_output": true, "out": true, + ".next": true, ".nuxt": true, ".turbo": true, "Pods": true, + "elm-stuff": true, "_build": true, "env": true, +} + +// extToLanguage maps common file extensions to language display names. +var extToLanguage = map[string]string{ + ".go": "Go", ".js": "JavaScript", ".ts": "TypeScript", ".tsx": "TypeScript", + ".jsx": "JavaScript", ".py": "Python", ".rb": "Ruby", ".rs": "Rust", + ".java": "Java", ".kt": "Kotlin", ".swift": "Swift", ".cs": "C#", + ".cpp": "C++", ".c": "C", ".h": "C", ".php": "PHP", ".scala": "Scala", + ".elm": "Elm", ".ex": "Elixir", ".exs": "Elixir", ".sh": "Shell", + ".bash": "Shell", ".zig": "Zig", ".lua": "Lua", ".r": "R", ".jl": "Julia", +} + +// BuildProjectGraph generates a ProjectGraph from local repository analysis +// with no external API calls. +func BuildProjectGraph(ctx context.Context, rootDir, projectName string) (*ProjectGraph, error) { + extCounts, dirFiles, totalFiles, err := collectFiles(ctx, rootDir) + if err != nil { + return nil, err + } + lang, languages := detectLanguages(extCounts) + desc := readDescription(rootDir) + domains := buildDomains(dirFiles) + + g := &ProjectGraph{ + Name: projectName, + Language: lang, + Description: desc, + Domains: domains, + ExternalDeps: DetectExternalDeps(rootDir), + Stats: Stats{ + TotalFiles: totalFiles, + Languages: languages, + }, + UpdatedAt: time.Now(), + } + g.CriticalFiles = localTopFiles(g.Domains, 10) + return g, nil +} + +// ReadClaudeMD reads and returns the contents of CLAUDE.md from rootDir, +// truncated to 3 000 runes. Returns "" if the file is absent. +func ReadClaudeMD(rootDir string) string { + data, err := os.ReadFile(filepath.Join(rootDir, "CLAUDE.md")) + if err != nil { + return "" + } + content := strings.TrimSpace(string(data)) + const maxRunes = 3000 + runes := []rune(content) + if len(runes) > maxRunes { + content = string(runes[:maxRunes]) + "\n\n*(CLAUDE.md truncated — showing first 3000 chars)*" + } + return content +} + +// DetectExternalDeps scans rootDir for common dependency manifests and returns +// up to 15 top-level dependency names. Supports go.mod, package.json, +// requirements.txt, Cargo.toml, Gemfile, and pyproject.toml. +func DetectExternalDeps(rootDir string) []string { //nolint:gocyclo // manifest-per-format parsing; splitting would obscure the intent + const maxDeps = 15 + seen := make(map[string]bool) + var deps []string + var npmRuntime, npmDev []string + + add := func(name string) { + name = strings.TrimSpace(name) + if name == "" || seen[name] { + return + } + seen[name] = true + deps = append(deps, name) + } + + // go.mod + if data, err := os.ReadFile(filepath.Join(rootDir, "go.mod")); err == nil { + inRequire := false + ownModule := "" + for _, line := range strings.Split(string(data), "\n") { + t := strings.TrimSpace(line) + if strings.HasPrefix(t, "module ") { + if f := strings.Fields(t); len(f) >= 2 { + ownModule = f[1] + } + continue + } + if t == "require (" { + inRequire = true + continue + } + if inRequire && t == ")" { + inRequire = false + continue + } + var mod string + if strings.HasPrefix(t, "require ") { + if f := strings.Fields(t); len(f) >= 2 { + mod = f[1] + } + } else if inRequire { + if i := strings.Index(t, "//"); i >= 0 { + t = strings.TrimSpace(t[:i]) + } + if f := strings.Fields(t); len(f) >= 1 { + mod = f[0] + } + } + if mod == "" || mod == ownModule { + continue + } + segs := strings.Split(mod, "/") + add(segs[len(segs)-1]) + } + } + + // package.json — split runtime vs devDeps so runtime gets priority. + if data, err := os.ReadFile(filepath.Join(rootDir, "package.json")); err == nil { + var pkg struct { + Dependencies map[string]json.RawMessage `json:"dependencies"` + DevDependencies map[string]json.RawMessage `json:"devDependencies"` + } + if json.Unmarshal(data, &pkg) == nil { + for name := range pkg.Dependencies { + if name = strings.TrimSpace(name); name != "" { + npmRuntime = append(npmRuntime, name) + } + } + for name := range pkg.DevDependencies { + if name = strings.TrimSpace(name); name != "" { + npmDev = append(npmDev, name) + } + } + } + } + + // requirements.txt + if data, err := os.ReadFile(filepath.Join(rootDir, "requirements.txt")); err == nil { + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "-") { + continue + } + name := line + if i := strings.Index(name, " @ "); i >= 0 { + name = name[:i] + } + for _, sep := range []string{";", " #", "[", "==", ">=", "<=", "!=", "~=", ">", "<"} { + if i := strings.Index(name, sep); i >= 0 { + name = name[:i] + } + } + add(name) + } + } + + // Cargo.toml + if data, err := os.ReadFile(filepath.Join(rootDir, "Cargo.toml")); err == nil { + inDeps := false + depth := 0 + for _, line := range strings.Split(string(data), "\n") { + t := strings.TrimSpace(line) + if strings.HasPrefix(t, "[") { + inDeps = t == "[dependencies]" || t == "[dev-dependencies]" || t == "[build-dependencies]" || + t == "[workspace.dependencies]" || t == "[workspace.dev-dependencies]" || t == "[workspace.build-dependencies]" + depth = 0 + continue + } + opens := strings.Count(t, "{") + closes := strings.Count(t, "}") + if inDeps && depth == 0 && strings.Contains(t, "=") && !strings.HasPrefix(t, "#") { + parts := strings.SplitN(t, "=", 2) + add(strings.TrimSpace(parts[0])) + } + depth += opens - closes + if depth < 0 { + depth = 0 + } + } + } + + // Gemfile + if data, err := os.ReadFile(filepath.Join(rootDir, "Gemfile")); err == nil { + for _, line := range strings.Split(string(data), "\n") { + t := strings.TrimSpace(line) + if !strings.HasPrefix(t, "gem ") && !strings.HasPrefix(t, "gem\t") { + continue + } + rest := strings.TrimSpace(t[3:]) + for _, q := range []string{"'", `"`} { + if strings.HasPrefix(rest, q) { + if end := strings.Index(rest[1:], q); end >= 0 { + add(rest[1 : end+1]) + break + } + } + } + } + } + + // pyproject.toml + if data, err := os.ReadFile(filepath.Join(rootDir, "pyproject.toml")); err == nil { + inPoetryDeps := false + inProjectSection := false + inProjectDepsArray := false + for _, line := range strings.Split(string(data), "\n") { + t := strings.TrimSpace(line) + if strings.HasPrefix(t, "[") { + inPoetryDeps = t == "[tool.poetry.dependencies]" || t == "[tool.poetry.dev-dependencies]" + inProjectSection = t == "[project]" + if !inProjectSection { + inProjectDepsArray = false + } + continue + } + if inProjectSection && !inProjectDepsArray { + if strings.HasPrefix(t, "dependencies") && strings.Contains(t, "=") { + eqIdx := strings.Index(t, "=") + rest := strings.TrimSpace(t[eqIdx+1:]) + openIdx := strings.Index(rest, "[") + closeIdx := strings.Index(rest, "]") + if openIdx >= 0 && closeIdx > openIdx { + for _, part := range strings.Split(rest[openIdx+1:closeIdx], ",") { + dep := cleanPyDep(strings.Trim(part, `"', `)) + if dep != "" { + add(dep) + } + } + } else { + inProjectDepsArray = true + } + continue + } + } + if inProjectDepsArray { + if strings.HasPrefix(t, "]") { + inProjectDepsArray = false + continue + } + dep := cleanPyDep(strings.Trim(t, `"', `)) + if dep != "" && !strings.HasPrefix(dep, "#") { + add(dep) + } + continue + } + if inPoetryDeps && strings.Contains(t, "=") && !strings.HasPrefix(t, "#") { + parts := strings.SplitN(t, "=", 2) + if name := strings.TrimSpace(parts[0]); name != "python" { + add(name) + } + } + } + } + + // Priority: non-npm manifest deps (go.mod, Cargo.toml, requirements.txt, etc.) + // fill the budget first; npm runtime deps are appended if space remains, dev + // deps last. This keeps the most-structured manifests dominant. + sort.Strings(deps) + if len(deps) > maxDeps { + deps = deps[:maxDeps] + } + sort.Strings(npmRuntime) + for _, name := range npmRuntime { + if len(deps) >= maxDeps { + break + } + add(name) + } + sort.Strings(npmDev) + for _, name := range npmDev { + if len(deps) >= maxDeps { + break + } + add(name) + } + return deps +} + +func cleanPyDep(dep string) string { + for _, sep := range []string{";", " #", "[", ">=", "<=", "==", "!=", "~=", ">", "<"} { + if i := strings.Index(dep, sep); i >= 0 { + dep = dep[:i] + } + } + return strings.TrimSpace(dep) +} + +// collectFiles walks rootDir and returns extension counts, files per directory +// key, and total file count. +func collectFiles(ctx context.Context, rootDir string) (extCounts map[string]int, dirFiles map[string][]string, total int, err error) { + extCounts = make(map[string]int) + dirFiles = make(map[string][]string) + + walkErr := filepath.WalkDir(rootDir, func(path string, d fs.DirEntry, werr error) error { + if werr != nil { + return werr + } + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + if d.IsDir() { + name := d.Name() + if ignoreDirs[name] || strings.HasPrefix(name, ".") { + return filepath.SkipDir + } + return nil + } + if d.Type()&fs.ModeSymlink != 0 { + return nil + } + rel, rerr := filepath.Rel(rootDir, path) + if rerr != nil { + return nil + } + // Skip hidden files. + if strings.HasPrefix(d.Name(), ".") { + return nil + } + + ext := strings.ToLower(filepath.Ext(path)) + if ext != "" { + extCounts[ext]++ + } + total++ + + parts := strings.SplitN(rel, string(filepath.Separator), 3) + dir := "" + if len(parts) > 1 { + dir = parts[0] + if deepDirs[dir] && len(parts) > 2 { + dir = parts[0] + string(filepath.Separator) + parts[1] + } + } + dirFiles[dir] = append(dirFiles[dir], rel) + return nil + }) + return extCounts, dirFiles, total, walkErr +} + +func detectLanguages(extCounts map[string]int) (primary string, languages []string) { + langCounts := make(map[string]int) + for ext, count := range extCounts { + if lang, ok := extToLanguage[ext]; ok { + langCounts[lang] += count + } + } + type lc struct { + lang string + count int + } + var sorted []lc + for lang, count := range langCounts { + sorted = append(sorted, lc{lang, count}) + } + sort.Slice(sorted, func(i, j int) bool { + if sorted[i].count != sorted[j].count { + return sorted[i].count > sorted[j].count + } + return sorted[i].lang < sorted[j].lang + }) + for _, item := range sorted { + languages = append(languages, item.lang) + } + if len(languages) > 0 { + primary = languages[0] + } + if len(languages) > 5 { + languages = languages[:5] + } + return primary, languages +} + +func readDescription(rootDir string) string { + for _, name := range []string{"README.md", "readme.md", "README.rst", "readme.rst", "README.txt"} { + data, err := os.ReadFile(filepath.Join(rootDir, name)) + if err != nil { + continue + } + for _, line := range strings.Split(strings.TrimSpace(string(data)), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "#") || strings.HasPrefix(line, "[![") || + strings.HasPrefix(line, "![") || strings.HasPrefix(line, "|") || + strings.HasPrefix(line, "```") || strings.HasPrefix(line, "~~~") || + isHorizontalRule(line) { + continue + } + if line != "" && len([]rune(line)) < 250 { + return line + } + } + } + return "" +} + +func isHorizontalRule(line string) bool { + if len(line) < 3 { + return false + } + var ch rune + count := 0 + for _, c := range line { + if c == ' ' { + continue + } + if ch == 0 { + if c != '-' && c != '*' && c != '_' { + return false + } + ch = c + } else if c != ch { + return false + } + count++ + } + return ch != 0 && count >= 3 +} + +func buildDomains(dirFiles map[string][]string) []Domain { + const maxKeyFiles = 8 + const maxDomains = 20 + + var dirs []string + for dir := range dirFiles { + dirs = append(dirs, dir) + } + sort.Slice(dirs, func(i, j int) bool { + ci, cj := len(dirFiles[dirs[i]]), len(dirFiles[dirs[j]]) + if ci != cj { + return ci > cj + } + return dirs[i] < dirs[j] + }) + if len(dirs) > maxDomains { + dirs = dirs[:maxDomains] + } + + var domains []Domain + for _, dir := range dirs { + files := dirFiles[dir] + sort.Slice(files, func(i, j int) bool { + pi, pj := entryPointPriority(files[i]), entryPointPriority(files[j]) + if pi != pj { + return pi > pj + } + li, lj := len(files[i]), len(files[j]) + if li != lj { + return li < lj + } + return files[i] < files[j] + }) + keyFiles := files + if len(keyFiles) > maxKeyFiles { + keyFiles = keyFiles[:maxKeyFiles] + } + name := dir + if name == "" { + name = "Root" + } + domains = append(domains, Domain{ + Name: name, + Description: fmt.Sprintf("%d file(s)", len(files)), + KeyFiles: keyFiles, + }) + } + return domains +} + +// localTopFiles picks the top files across all domains by entry-point priority. +// In local mode RelationshipCount is always 0 (no cross-domain data available). +func localTopFiles(domains []Domain, n int) []CriticalFile { + seen := make(map[string]struct{}) + var files []CriticalFile + for i := range domains { + for _, f := range domains[i].KeyFiles { + if _, ok := seen[f]; ok { + continue + } + seen[f] = struct{}{} + files = append(files, CriticalFile{Path: f}) + } + } + sort.Slice(files, func(i, j int) bool { + pi, pj := entryPointPriority(files[i].Path), entryPointPriority(files[j].Path) + if pi != pj { + return pi > pj + } + li, lj := len(files[i].Path), len(files[j].Path) + if li != lj { + return li < lj + } + return files[i].Path < files[j].Path + }) + if len(files) > n { + files = files[:n] + } + return files +} + +func entryPointPriority(path string) int { + base := filepath.Base(path) + name := strings.TrimSuffix(base, filepath.Ext(base)) + switch strings.ToLower(name) { + case "main": + return 4 + case "app", "application": + return 3 + case "server", "index": + return 2 + case "init", "__init__": + return 1 + } + return 0 +} diff --git a/internal/restore/render.go b/internal/restore/render.go new file mode 100644 index 0000000..08f0fb1 --- /dev/null +++ b/internal/restore/render.go @@ -0,0 +1,310 @@ +package restore + +import ( + "bytes" + "fmt" + "strings" + gotmpl "text/template" + "time" + "unicode" +) + +const maxCyclesToShow = 10 + +// DefaultMaxTokens is the default token budget for the rendered context bomb. +const DefaultMaxTokens = 2000 + +var rendererTmpl = gotmpl.Must( + gotmpl.New("context_bomb").Funcs(gotmpl.FuncMap{ + "join": strings.Join, + "languageList": func(langs []string) string { return strings.Join(langs, ", ") }, + "add1": func(i int) int { return i + 1 }, + "blockquote": func(s string) string { + s = strings.TrimRight(s, "\r\n") + return strings.ReplaceAll(s, "\n", "\n> ") + }, + }).Parse(contextBombTmpl), +) + +const contextBombTmpl = `# Supermodel Context — {{.ProjectName}} + +> Restored by ` + "`supermodel restore`" + ` at {{.Timestamp}}{{if .LocalMode}} | local mode (run ` + "`supermodel login`" + ` for AI-powered features){{end}}{{if .Stale}} | ⚠️ STALE: last updated {{.StaleDuration}}{{end}} +{{- if .Graph.Stats.CircularDependencyCycles}} +> ⚠️ {{.Graph.Stats.CircularDependencyCycles}} circular dependency {{if eq .Graph.Stats.CircularDependencyCycles 1}}cycle{{else}}cycles{{end}} detected{{range .CappedCycles}} +> - {{join .Cycle " → "}}{{end}}{{if .ExtraCycles}} +> ... and {{.ExtraCycles}} more{{end}} +{{- end}} +{{- if and (not .LocalMode) (not .Graph.CircularDepsAnalyzed)}} +> ⚠️ Circular dependency analysis unavailable +{{- end}} + +## Project Overview + +**Name:** {{.Graph.Name}} +**Language:** {{.Graph.Language}}{{if .Graph.Framework}} +**Framework:** {{.Graph.Framework}}{{end}}{{if .Graph.Description}} +**Description:** {{.Graph.Description}}{{end}} +**Codebase:** {{.Graph.Stats.TotalFiles}} files · {{.Graph.Stats.TotalFunctions}} functions +{{- if .Graph.Stats.Languages}} + +**Languages:** {{languageList .Graph.Stats.Languages}}{{end}}{{if .Graph.ExternalDeps}} +**Tech stack:** {{join .Graph.ExternalDeps ", "}}{{end}}{{if .Graph.CriticalFiles}} +## Critical Files +{{range $i, $f := .Graph.CriticalFiles}}{{add1 $i}}. {{$f.Path}}{{if $f.RelationshipCount}} — {{$f.RelationshipCount}} relationships{{end}} +{{end}}{{end}} +## Domain Map +{{range .Graph.Domains}} +### {{.Name}} +{{.Description}} +{{if .KeyFiles}}**Key files:** {{join .KeyFiles ", "}} +{{end}}{{if .Responsibilities}}**Responsibilities:** +{{range .Responsibilities}}- {{.}} +{{end}}{{end}}{{if .Subdomains}}**Subdomains:** +{{range .Subdomains}}- {{.Name}}{{if .Description}}: {{.Description}}{{end}} +{{end}}{{end}}{{if .DependsOn}}**Depends on:** {{join .DependsOn ", "}} +{{end}}{{end}}{{- if .ClaudeMD}} +## Project Instructions (CLAUDE.md) + +{{.ClaudeMD}} +{{- end}} +--- +*Generated by [supermodel restore](https://supermodeltools.com)*` + +// RenderOptions controls the context bomb output. +type RenderOptions struct { + // MaxTokens is the token budget. Defaults to DefaultMaxTokens when zero. + MaxTokens int + // Stale indicates the graph data is older than the TTL. + Stale bool + StaleAt *time.Time + // ClaudeMD is the raw content of CLAUDE.md in the project root (may be ""). + ClaudeMD string + // LocalMode causes an informational banner to be shown (no API key used). + LocalMode bool +} + +// Render produces the context bomb Markdown, respecting the token budget. +// Returns the rendered text, estimated token count, and any template error. +func Render(graph *ProjectGraph, projectName string, opts RenderOptions) (output string, tokens int, err error) { + if graph == nil { + return "", 0, fmt.Errorf("render: graph is nil") + } + if opts.MaxTokens <= 0 { + opts.MaxTokens = DefaultMaxTokens + } + + now := time.Now().UTC() + staleDuration := "" + if opts.Stale && opts.StaleAt != nil { + staleDuration = humanDuration(now.Sub(*opts.StaleAt)) + } + + cappedCycles := graph.Cycles + extraCycles := 0 + if len(graph.Cycles) > maxCyclesToShow { + cappedCycles = graph.Cycles[:maxCyclesToShow] + extraCycles = len(graph.Cycles) - maxCyclesToShow + } + + data := struct { + ProjectName string + Timestamp string + Graph *ProjectGraph + CappedCycles []CircularDependencyCycle + ExtraCycles int + Stale bool + StaleDuration string + ClaudeMD string + LocalMode bool + }{ + ProjectName: projectName, + Timestamp: now.Format("2006-01-02 15:04:05 UTC"), + Graph: graph, + CappedCycles: cappedCycles, + ExtraCycles: extraCycles, + Stale: opts.Stale, + StaleDuration: staleDuration, + ClaudeMD: opts.ClaudeMD, + LocalMode: opts.LocalMode, + } + + var full bytes.Buffer + if err := rendererTmpl.Execute(&full, data); err != nil { + return "", 0, fmt.Errorf("rendering template: %w", err) + } + + output = full.String() + tokens = CountTokens(output) + if tokens <= opts.MaxTokens { + return output, tokens, nil + } + return truncateToTokenBudget(graph, projectName, opts) +} + +// truncateToTokenBudget progressively drops lower-priority content to fit. +func truncateToTokenBudget(graph *ProjectGraph, projectName string, opts RenderOptions) (output string, tokens int, err error) { //nolint:gocyclo // progressive truncation strategy; splitting would obscure priority ordering + now := time.Now().UTC() + staleDuration := "" + if opts.Stale && opts.StaleAt != nil { + staleDuration = humanDuration(now.Sub(*opts.StaleAt)) + } + + var hdr strings.Builder + fmt.Fprintf(&hdr, "# Supermodel Context — %s\n\n", projectName) + banner := fmt.Sprintf("> Restored by `supermodel restore` at %s", now.Format("2006-01-02 15:04:05 UTC")) + if opts.LocalMode { + banner += " | local mode (run `supermodel login` for AI-powered features)" + } + if opts.Stale && staleDuration != "" { + banner += fmt.Sprintf(" | ⚠️ STALE: last updated %s", staleDuration) + } + hdr.WriteString(banner + "\n") + if graph.Stats.CircularDependencyCycles > 0 { + label := "cycles" + if graph.Stats.CircularDependencyCycles == 1 { + label = "cycle" + } + fmt.Fprintf(&hdr, "> ⚠️ %d circular dependency %s detected\n", graph.Stats.CircularDependencyCycles, label) + } + fmt.Fprintf(&hdr, + "\n**Language:** %s · **Files:** %d · **Functions:** %d", + graph.Language, graph.Stats.TotalFiles, graph.Stats.TotalFunctions, + ) + required := hdr.String() + + reqTokens := CountTokens(required) + if reqTokens > opts.MaxTokens { + fb := "# Supermodel Context\n\n(Budget too small; increase --max-tokens)" + return fb, CountTokens(fb), nil + } + remaining := opts.MaxTokens - reqTokens + + var sb strings.Builder + sb.WriteString(required) + + if len(graph.CriticalFiles) > 0 { + header := "\n\n## Critical Files\n" + if ht := CountTokens(header); ht <= remaining { + sb.WriteString(header) + remaining -= ht + for i, f := range graph.CriticalFiles { + var line string + if f.RelationshipCount > 0 { + line = fmt.Sprintf("%d. %s — %d relationships\n", i+1, f.Path, f.RelationshipCount) + } else { + line = fmt.Sprintf("%d. %s\n", i+1, f.Path) + } + lt := CountTokens(line) + if lt > remaining { + break + } + sb.WriteString(line) + remaining -= lt + } + } + } + + header := "\n\n## Domain Map\n" + if ht := CountTokens(header); ht <= remaining { + sectionBudget := remaining - ht + var sections []string + for i := range graph.Domains { + s := buildDomainSection(&graph.Domains[i]) + if st := CountTokens(s); st <= sectionBudget { + sections = append(sections, s) + sectionBudget -= st + } + if sectionBudget < 50 { + break + } + } + if len(sections) > 0 { + sb.WriteString(header) + remaining = sectionBudget + for _, s := range sections { + sb.WriteString(s) + } + } + } + + if opts.ClaudeMD != "" { + header := "\n\n## Project Instructions (CLAUDE.md)\n\n" + ht := CountTokens(header) + ct := CountTokens(opts.ClaudeMD) + if ht+ct <= remaining { + sb.WriteString(header) + sb.WriteString(opts.ClaudeMD) + sb.WriteString("\n") + remaining -= ht + ct + 1 + } + } + + footer := "\n\n---\n*Generated by supermodel restore (budget-truncated)*" + if CountTokens(footer) <= remaining { + sb.WriteString(footer) + } + + result := sb.String() + return result, CountTokens(result), nil +} + +func buildDomainSection(d *Domain) string { + var sb strings.Builder + fmt.Fprintf(&sb, "\n### %s\n%s\n", d.Name, d.Description) + if len(d.KeyFiles) > 0 { + fmt.Fprintf(&sb, "**Key files:** %s\n", strings.Join(d.KeyFiles, ", ")) + } + if len(d.Responsibilities) > 0 { + sb.WriteString("**Responsibilities:**\n") + for _, r := range d.Responsibilities { + fmt.Fprintf(&sb, "- %s\n", r) + } + } + if len(d.Subdomains) > 0 { + sb.WriteString("**Subdomains:**\n") + for _, s := range d.Subdomains { + if s.Description != "" { + fmt.Fprintf(&sb, "- %s: %s\n", s.Name, s.Description) + } else { + fmt.Fprintf(&sb, "- %s\n", s.Name) + } + } + } + if len(d.DependsOn) > 0 { + fmt.Fprintf(&sb, "**Depends on:** %s\n", strings.Join(d.DependsOn, ", ")) + } + return sb.String() +} + +// CountTokens estimates the LLM token count of text using max(chars/4, words*4/3). +func CountTokens(text string) int { + words := 0 + inWord := false + for _, r := range text { + if unicode.IsSpace(r) { + inWord = false + } else if !inWord { + words++ + inWord = true + } + } + charEstimate := len(text) / 4 + wordEstimate := words * 100 / 75 + if charEstimate > wordEstimate { + return charEstimate + } + return wordEstimate +} + +func humanDuration(d time.Duration) string { + if d < time.Minute { + return fmt.Sprintf("%d seconds", int(d.Seconds())) + } + if d < time.Hour { + return fmt.Sprintf("%d minutes", int(d.Minutes())) + } + if d < 24*time.Hour { + return fmt.Sprintf("%.1f hours", d.Hours()) + } + return fmt.Sprintf("%.1f days", d.Hours()/24) +} diff --git a/internal/restore/types.go b/internal/restore/types.go new file mode 100644 index 0000000..bd73b92 --- /dev/null +++ b/internal/restore/types.go @@ -0,0 +1,161 @@ +package restore + +import ( + "sort" + "time" + + "github.com/supermodeltools/cli/internal/api" +) + +// ProjectGraph is the processed project model used for rendering. +// It is derived from either a SupermodelIR API response or a local file scan. +type ProjectGraph struct { + Name string `json:"name"` + Language string `json:"language"` + Framework string `json:"framework,omitempty"` + Description string `json:"description,omitempty"` + Domains []Domain `json:"domains"` + ExternalDeps []string `json:"external_deps,omitempty"` + CriticalFiles []CriticalFile `json:"critical_files,omitempty"` + Stats Stats `json:"stats"` + Cycles []CircularDependencyCycle `json:"cycles,omitempty"` + CircularDepsAnalyzed bool `json:"circular_deps_analyzed"` + UpdatedAt time.Time `json:"updated_at"` +} + +// Domain is a semantic area of the codebase. +type Domain struct { + Name string `json:"name"` + Description string `json:"description"` + KeyFiles []string `json:"key_files"` + Responsibilities []string `json:"responsibilities"` + Subdomains []Subdomain `json:"subdomains,omitempty"` + DependsOn []string `json:"depends_on,omitempty"` +} + +// Subdomain is a named sub-area within a Domain. +type Subdomain struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` +} + +// CriticalFile is a highly-referenced file derived from domain key file counts. +type CriticalFile struct { + Path string `json:"path"` + RelationshipCount int `json:"relationship_count"` +} + +// Stats holds aggregate codebase statistics. +type Stats struct { + TotalFiles int `json:"total_files"` + TotalFunctions int `json:"total_functions"` + Languages []string `json:"languages,omitempty"` + CircularDependencyCycles int `json:"circular_dependency_cycles,omitempty"` +} + +// CircularDependencyCycle is a single circular import chain. +type CircularDependencyCycle struct { + Cycle []string `json:"cycle"` +} + +// FromSupermodelIR converts the raw API response into a ProjectGraph. +func FromSupermodelIR(ir *api.SupermodelIR, projectName string) *ProjectGraph { + lang := "" + if len(ir.Metadata.Languages) > 0 { + lang = ir.Metadata.Languages[0] + } + if v, ok := ir.Summary["primaryLanguage"]; ok { + if s, ok := v.(string); ok && s != "" { + lang = s + } + } + + summaryInt := func(key string) int { + if v, ok := ir.Summary[key]; ok { + if n, ok := v.(float64); ok { + return int(n) + } + } + return 0 + } + + // Build domain → dependsOn map from DOMAIN_RELATES edges. + dependsOn := make(map[string][]string) + for _, rel := range ir.Graph.Relationships { + if rel.Type == "DOMAIN_RELATES" && rel.Source != "" && rel.Target != "" { + dependsOn[rel.Source] = append(dependsOn[rel.Source], rel.Target) + } + } + + domains := make([]Domain, 0, len(ir.Domains)) + for _, d := range ir.Domains { + subs := make([]Subdomain, 0, len(d.Subdomains)) + for _, s := range d.Subdomains { + subs = append(subs, Subdomain{Name: s.Name, Description: s.DescriptionSummary}) + } + domains = append(domains, Domain{ + Name: d.Name, + Description: d.DescriptionSummary, + KeyFiles: d.KeyFiles, + Responsibilities: d.Responsibilities, + Subdomains: subs, + DependsOn: dependsOn[d.Name], + }) + } + + var extDeps []string + for _, node := range ir.Graph.Nodes { + if node.Type == "ExternalDependency" && node.Name != "" { + extDeps = append(extDeps, node.Name) + } + } + + g := &ProjectGraph{ + Name: projectName, + Language: lang, + Domains: domains, + ExternalDeps: extDeps, + Stats: Stats{ + TotalFiles: summaryInt("filesProcessed"), + TotalFunctions: summaryInt("functions"), + Languages: ir.Metadata.Languages, + }, + UpdatedAt: time.Now(), + } + g.CriticalFiles = computeCriticalFiles(g.Domains, 10) + return g +} + +// computeCriticalFiles derives the most-referenced files by counting how many +// domains list each file as a key file. The top n files are returned. +func computeCriticalFiles(domains []Domain, n int) []CriticalFile { + if n <= 0 { + return nil + } + counts := make(map[string]int) + for i := range domains { + d := &domains[i] + seen := make(map[string]struct{}, len(d.KeyFiles)) + for _, f := range d.KeyFiles { + if _, exists := seen[f]; exists { + continue + } + seen[f] = struct{}{} + counts[f]++ + } + } + files := make([]CriticalFile, 0, len(counts)) + for path, count := range counts { + files = append(files, CriticalFile{Path: path, RelationshipCount: count}) + } + sort.Slice(files, func(i, j int) bool { + if files[i].RelationshipCount != files[j].RelationshipCount { + return files[i].RelationshipCount > files[j].RelationshipCount + } + return files[i].Path < files[j].Path + }) + if len(files) > n { + files = files[:n] + } + return files +}