From c26d3f9ed031276df9130ea9e275ae313d004f1f Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Tue, 10 Mar 2026 11:50:55 +0000
Subject: [PATCH 01/16] chore: install ripgrep

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 scripts/services/docker/Dockerfile.git_integration | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/services/docker/Dockerfile.git_integration b/scripts/services/docker/Dockerfile.git_integration
index 84895879d1..4c9c371007 100644
--- a/scripts/services/docker/Dockerfile.git_integration
+++ b/scripts/services/docker/Dockerfile.git_integration
@@ -82,6 +82,7 @@ FROM base AS runner
 RUN apt-get update && apt-get install -y \
     ca-certificates \
     git \
+    ripgrep \
     --no-install-recommends \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean \

From e1f8dad3623d5392934d889cfab654e7dd997295 Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Tue, 10 Mar 2026 12:47:26 +0000
Subject: [PATCH 02/16] feat: leverage maintainersFile from db before falling
 back to regular detection

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../services/maintainer/maintainer_service.py | 94 +++++++++++++++----
 1 file changed, 78 insertions(+), 16 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 1734dd75e6..41a58d6d80 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -397,9 +397,76 @@ async def find_maintainer_file(self, repo_path: str, owner: str, repo: str):
 
         return None, None, ai_cost
 
-    async def extract_maintainers(self, repo_path: str, owner: str, repo: str):
+    async def analyze_and_build_result(self, filename: str, content: str) -> MaintainerResult:
+        """
+        Analyze file content with AI and return a MaintainerResult.
+        Raises MaintanerAnalysisError if no maintainers are found.
+        """
+        self.logger.info(f"Analyzing maintainer file: {filename}")
+        result = await self.analyze_file_content(filename, content)
+
+        if not result.output.info:
+            raise MaintanerAnalysisError(ai_cost=result.cost)
+
+        return MaintainerResult(
+            maintainer_file=filename,
+            maintainer_info=result.output.info,
+            total_cost=result.cost,
+        )
+
+    async def try_saved_maintainer_file(
+        self, repo_path: str, saved_maintainer_file: str
+    ) -> tuple[MaintainerResult | None, float]:
+        """
+        Attempt to read and analyze the previously saved maintainer file.
+        Returns (result, cost) where result is None if the attempt failed.
+        """
+        cost = 0.0
+        file_path = os.path.join(repo_path, saved_maintainer_file)
+
+        if not await aiofiles.os.path.isfile(file_path):
+            self.logger.warning(
+                f"Saved maintainer file '{saved_maintainer_file}' no longer exists on disk"
+            )
+            return None, cost
+
+        try:
+            async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
+                content = await f.read()
+
+            result = await self.analyze_and_build_result(saved_maintainer_file, content)
+            cost += result.total_cost
+            return result, cost
+        except MaintanerAnalysisError as e:
+            cost += e.ai_cost
+            self.logger.warning(
+                f"Saved maintainer file '{saved_maintainer_file}' analysis failed: {e.error_message}"
+            )
+            return None, cost
+        except Exception as e:
+            self.logger.warning(
+                f"Saved maintainer file '{saved_maintainer_file}' processing failed: {repr(e)}"
+            )
+            return None, cost
+
+    async def extract_maintainers(
+        self,
+        repo_path: str,
+        owner: str,
+        repo: str,
+        saved_maintainer_file: str | None = None,
+    ):
         total_cost = 0
 
+        if saved_maintainer_file:
+            self.logger.info(f"Trying saved maintainer file: {saved_maintainer_file}")
+            result, cost = await self.try_saved_maintainer_file(repo_path, saved_maintainer_file)
+            total_cost += cost
+            if result:
+                result.total_cost = total_cost
+                return result
+            self.logger.info("Falling back to maintainer file detection")
+
         self.logger.info("Looking for maintainer file...")
         maintainer_file, file_content, cost = await self.find_maintainer_file(
             repo_path, owner, repo
@@ -411,21 +478,11 @@ async def extract_maintainers(self, repo_path: str, owner: str, repo: str):
             raise MaintainerFileNotFoundError(ai_cost=total_cost)
 
         decoded_content = base64.b64decode(file_content).decode("utf-8")
+        result = await self.analyze_and_build_result(maintainer_file, decoded_content)
+        total_cost += result.total_cost
 
-        self.logger.info(f"Analyzing maintainer file: {maintainer_file}")
-        result = await self.analyze_file_content(maintainer_file, decoded_content)
-        maintainer_info = result.output.info
-        total_cost += result.cost
-
-        if not maintainer_info:
-            self.logger.error("Failed to analyze the maintainer file content.")
-            raise MaintanerAnalysisError(ai_cost=total_cost)
-
-        return MaintainerResult(
-            maintainer_file=maintainer_file,
-            maintainer_info=maintainer_info,
-            total_cost=total_cost,
-        )
+        result.total_cost = total_cost
+        return result
 
     async def check_if_interval_elapsed(self, repository: Repository) -> tuple[bool, float]:
         """
@@ -514,7 +571,12 @@ async def process_maintainers(
                 )
 
             self.logger.info(f"Starting maintainers processing for repo: {batch_info.remote}")
-            maintainers = await self.extract_maintainers(batch_info.repo_path, owner, repo_name)
+            maintainers = await self.extract_maintainers(
+                batch_info.repo_path,
+                owner,
+                repo_name,
+                saved_maintainer_file=repository.maintainer_file,
+            )
             latest_maintainer_file = maintainers.maintainer_file
             ai_cost = maintainers.total_cost
             maintainers_found = len(maintainers.maintainer_info)

From e31275bcaa3f457b7edca682fef92a235dc427bd Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Tue, 10 Mar 2026 15:31:06 +0000
Subject: [PATCH 03/16] feat: improve maintainers detection & analysis

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../services/maintainer/maintainer_service.py | 244 ++++++++++++++----
 1 file changed, 192 insertions(+), 52 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 41a58d6d80..da0f995950 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -1,5 +1,4 @@
 import asyncio
-import base64
 import os
 import time as time_module
 from datetime import datetime, time, timezone
@@ -20,6 +19,7 @@
 )
 from crowdgit.enums import ErrorCode, ExecutionStatus, OperationType
 from crowdgit.errors import (
+    CommandExecutionError,
     CrowdGitError,
     MaintainerFileNotFoundError,
     MaintainerIntervalNotElapsedError,
@@ -37,7 +37,7 @@
 from crowdgit.models.service_execution import ServiceExecution
 from crowdgit.services.base.base_service import BaseService
 from crowdgit.services.maintainer.bedrock import invoke_bedrock
-from crowdgit.services.utils import parse_repo_url
+from crowdgit.services.utils import run_shell_command
 from crowdgit.settings import MAINTAINER_RETRY_INTERVAL_DAYS, MAINTAINER_UPDATE_INTERVAL_HOURS
 
 
@@ -46,24 +46,60 @@ class MaintainerService(BaseService):
 
     MAX_CHUNK_SIZE = 5000
     MAX_CONCURRENT_CHUNKS = 3  # Maximum concurrent chunk processing
+    MAX_AI_ANALYSIS_ATTEMPTS = 3
+
     # List of common maintainer file names
     MAINTAINER_FILES = [
         "MAINTAINERS",
         "MAINTAINERS.md",
         "MAINTAINER.md",
+        "CODEOWNERS",
         "CODEOWNERS.md",
         "CONTRIBUTORS",
         "CONTRIBUTORS.md",
-        "docs/MAINTAINERS.md",
         "OWNERS",
-        "CODEOWNERS",
+        "OWNERS.md",
+        "AUTHORS",
+        "AUTHORS.md",
+        "docs/MAINTAINERS.md",
         ".github/MAINTAINERS.md",
         ".github/CONTRIBUTORS.md",
+        ".github/CODEOWNERS",
         "GOVERNANCE.md",
-        "README.md",
-        "SECURITY-INSIGHTS.md",
     ]
 
+    VALID_EXTENSIONS = {
+        "",
+        ".md",
+        ".markdown",
+        ".txt",
+        ".rst",
+        ".yaml",
+        ".yml",
+        ".toml",
+        ".adoc",
+        ".csv",
+    }
+
+    CONTENT_VALIDATION_KEYWORDS = [
+        "maintainer",
+        "codeowner",
+        "owner",
+        "contributor",
+        "author",
+        "reviewer",
+        "governance",
+        "lead",
+        "approver",
+        "committer",
+        "credit",
+        "administrator",
+        "steward",
+        "emeritus",
+    ]
+
+    EXCLUDED_FILENAMES = {"contributing.md", "contributing"}
+
     def make_role(self, title: str):
         title = title.lower()
         title = (
@@ -358,44 +394,122 @@ async def find_maintainer_file_with_ai(self, file_names):
         else:
             return None, result.cost
 
-    async def find_maintainer_file(self, repo_path: str, owner: str, repo: str):
-        self.logger.info(f"Looking for maintainer files in {owner}/{repo}...")
-
-        file_names = await aiofiles.os.listdir(repo_path)
-
-        for file in self.MAINTAINER_FILES:
-            file_path = os.path.join(repo_path, file)
-            if await aiofiles.os.path.isfile(file_path):
-                self.logger.info(f"maintainer file: {file_path} found in repo")
-                async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
-                    content = await f.read()
+    async def _list_repo_files(self, repo_path: str) -> list[str]:
+        """List all files in the repo recursively, respecting .gitignore via rg."""
+        try:
+            output = await run_shell_command(
+                ["rg", "--files", "--hidden", "--glob", "!.git/", "."], cwd=repo_path
+            )
+            return [
+                line[2:] if line.startswith("./") else line
+                for line in output.strip().split("\n")
+                if line.strip()
+            ]
+        except Exception as e:
+            self.logger.warning(f"rg --files failed, falling back to os.walk: {repr(e)}")
+            results = []
+            for dirpath, dirnames, filenames in os.walk(repo_path):
+                dirnames[:] = [d for d in dirnames if d != ".git"]
+                for filename in filenames:
+                    full_path = os.path.join(dirpath, filename)
+                    results.append(os.path.relpath(full_path, repo_path))
+            return results
+
+    async def _ripgrep_search(self, repo_path: str) -> list[str]:
+        """Search for files containing maintainer-related keywords, filtered to valid extensions."""
+        pattern = "|".join(self.CONTENT_VALIDATION_KEYWORDS)
+
+        exclusion_globs = ["--glob", "!.git/"]
+        for name in self.EXCLUDED_FILENAMES:
+            exclusion_globs.extend(["--iglob", f"!{name}"])
 
-                if file.lower() == "readme.md" and "maintainer" not in content.lower():
-                    self.logger.info(f"Skipping {file}: no maintainer-related content found")
-                    continue
+        try:
+            output = await run_shell_command(
+                ["rg", "-l", "-i", "--hidden", pattern, *exclusion_globs, "."], cwd=repo_path
+            )
+        except CommandExecutionError:
+            self.logger.info("Ripgrep found no files containing maintainer keywords")
+            return []
+        except Exception as e:
+            self.logger.warning(f"Ripgrep search failed: {repr(e)}")
+            return []
 
-                return file, base64.b64encode(content.encode()).decode(), 0
+        results = []
+        for line in output.strip().split("\n"):
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith("./"):
+                line = line[2:]
+            basename = os.path.basename(line).lower()
+            ext = os.path.splitext(basename)[1]
+            if ext not in self.VALID_EXTENSIONS:
+                self.logger.debug(f"Skipping '{line}': extension '{ext}' not in valid extensions")
+                continue
+            if ext == "" and not any(kw in basename for kw in self.CONTENT_VALIDATION_KEYWORDS):
+                self.logger.debug(
+                    f"Skipping extensionless file '{line}': "
+                    f"basename '{basename}' contains no governance keyword"
+                )
+                continue
+            results.append(line)
 
-        self.logger.warning("No maintainer files found using the known file names.")
+        self.logger.info(f"Ripgrep found {len(results)} candidate files after filtering")
+        return results
 
-        file_name, ai_cost = await self.find_maintainer_file_with_ai(file_names)
+    async def find_candidate_files(self, repo_path: str) -> list[tuple[str, str]]:
+        """
+        Find all potential maintainer files using static list + dynamic ripgrep search.
+        Returns ordered list of (relative_path, content) tuples.
+        Static matches come first, then dynamic matches sorted by content keyword score.
+        """
+        candidates_static = []
+        static_paths_lower = set()
 
-        if file_name:
-            file_path = os.path.join(repo_path, file_name)
+        for file in self.MAINTAINER_FILES:
+            file_path = os.path.join(repo_path, file)
             if await aiofiles.os.path.isfile(file_path):
+                try:
+                    async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
+                        content = await f.read()
+                    candidates_static.append((file, content))
+                    static_paths_lower.add(file.lower())
+                    self.logger.info(f"Static match found: {file}")
+                except Exception as e:
+                    self.logger.warning(f"Failed to read static match {file}: {repr(e)}")
+
+        dynamic_paths = await self._ripgrep_search(repo_path)
+
+        scored_dynamic = []
+        for candidate_path in dynamic_paths:
+            if candidate_path.lower() in static_paths_lower:
+                continue
+
+            file_path = os.path.join(repo_path, candidate_path)
+            try:
                 async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
                     content = await f.read()
+            except Exception as e:
+                self.logger.warning(f"Failed to read dynamic match {candidate_path}: {repr(e)}")
+                continue
 
-                if file_name.lower() == "readme.md" and "maintainer" not in content.lower():
-                    self.logger.info(
-                        f"AI suggested {file_name}, but it has no maintainer-related content. Skipping."
-                    )
-                    return None, None, ai_cost
+            content_lower = content.lower()
+            # Calculate score based on keywords matched in the content
+            score = sum(1 for kw in self.CONTENT_VALIDATION_KEYWORDS if kw in content_lower)
+            if score > 0:
+                scored_dynamic.append((candidate_path, content, score))
+                self.logger.info(
+                    f"Dynamic match validated: {candidate_path} (keyword score: {score})"
+                )
 
-                self.logger.info(f"\nMaintainer file found: {file_name}")
-                return file_name, base64.b64encode(content.encode()).decode(), ai_cost
+        # Sort by score DESC
+        scored_dynamic.sort(key=lambda c: c[2], reverse=True)
 
-        return None, None, ai_cost
+        result = candidates_static + [(path, content) for path, content, _ in scored_dynamic]
+        self.logger.info(
+            f"Found {len(candidates_static)} static and {len(scored_dynamic)} dynamic candidates"
+        )
+        return result
 
     async def analyze_and_build_result(self, filename: str, content: str) -> MaintainerResult:
         """
@@ -452,12 +566,11 @@ async def try_saved_maintainer_file(
     async def extract_maintainers(
         self,
         repo_path: str,
-        owner: str,
-        repo: str,
         saved_maintainer_file: str | None = None,
     ):
         total_cost = 0
 
+        # Step 1: Try the previously saved maintainer file
         if saved_maintainer_file:
             self.logger.info(f"Trying saved maintainer file: {saved_maintainer_file}")
             result, cost = await self.try_saved_maintainer_file(repo_path, saved_maintainer_file)
@@ -467,22 +580,53 @@ async def extract_maintainers(
                 return result
             self.logger.info("Falling back to maintainer file detection")
 
-        self.logger.info("Looking for maintainer file...")
-        maintainer_file, file_content, cost = await self.find_maintainer_file(
-            repo_path, owner, repo
-        )
-        total_cost += cost
+        # Step 2: Find candidates via static list + ripgrep dynamic search
+        candidates = await self.find_candidate_files(repo_path)
+
+        # Step 3: Try AI analysis on candidates, stop on first success
+        if candidates:
+            attempts = min(len(candidates), self.MAX_AI_ANALYSIS_ATTEMPTS)
+            for filename, content in candidates[:attempts]:
+                try:
+                    result = await self.analyze_and_build_result(filename, content)
+                    total_cost += result.total_cost
+                    result.total_cost = total_cost
+                    return result
+                except MaintanerAnalysisError as e:
+                    total_cost += e.ai_cost
+                    self.logger.warning(f"AI analysis failed for '{filename}': {e.error_message}")
+                except Exception as e:
+                    self.logger.warning(f"Unexpected error analyzing '{filename}': {repr(e)}")
+
+            self.logger.warning(
+                f"AI analysis failed for all {attempts} candidate(s), trying AI file detection"
+            )
+        else:
+            self.logger.warning("No candidate files found via search, trying AI file detection")
 
-        if not maintainer_file or not file_content:
-            self.logger.error("No maintainer file found")
-            raise MaintainerFileNotFoundError(ai_cost=total_cost)
+        # Step 4: AI file detection as last resort
+        file_names = await self._list_repo_files(repo_path)
+        ai_file_name, ai_cost = await self.find_maintainer_file_with_ai(file_names)
+        total_cost += ai_cost
 
-        decoded_content = base64.b64decode(file_content).decode("utf-8")
-        result = await self.analyze_and_build_result(maintainer_file, decoded_content)
-        total_cost += result.total_cost
+        if ai_file_name:
+            file_path = os.path.join(repo_path, ai_file_name)
+            if await aiofiles.os.path.isfile(file_path):
+                try:
+                    async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
+                        content = await f.read()
+                    result = await self.analyze_and_build_result(ai_file_name, content)
+                    total_cost += result.total_cost
+                    result.total_cost = total_cost
+                    return result
+                except MaintanerAnalysisError as e:
+                    total_cost += e.ai_cost
+                    self.logger.warning(
+                        f"AI-suggested file '{ai_file_name}' analysis failed: {e.error_message}"
+                    )
 
-        result.total_cost = total_cost
-        return result
+        self.logger.error("No maintainer file found")
+        raise MaintainerFileNotFoundError(ai_cost=total_cost)
 
     async def check_if_interval_elapsed(self, repository: Repository) -> tuple[bool, float]:
         """
@@ -560,8 +704,6 @@ async def process_maintainers(
         maintainers_skipped = 0
 
         try:
-            owner, repo_name = parse_repo_url(batch_info.remote)
-
             has_interval_elapsed, remaining_hours = await self.check_if_interval_elapsed(
                 repository
             )
@@ -573,8 +715,6 @@ async def process_maintainers(
             self.logger.info(f"Starting maintainers processing for repo: {batch_info.remote}")
             maintainers = await self.extract_maintainers(
                 batch_info.repo_path,
-                owner,
-                repo_name,
                 saved_maintainer_file=repository.maintainer_file,
             )
             latest_maintainer_file = maintainers.maintainer_file

From 67ace7981f92f8ac5c9bf893ed2f953dd3aaeda0 Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Tue, 10 Mar 2026 15:39:09 +0000
Subject: [PATCH 04/16] feat: track analyzed maintainers files in metrics

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/models/maintainer_info.py    |  2 ++
 .../services/maintainer/maintainer_service.py | 25 ++++++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
index 5a420567ae..6914059a2b 100644
--- a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
+++ b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
@@ -34,3 +34,5 @@ class MaintainerResult(BaseModel):
     maintainer_file: str | None = None
     maintainer_info: list[MaintainerInfoItem] | None = None
     total_cost: float = 0
+    candidate_files: list[str] = []
+    ai_suggested_file: str | None = None
diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index da0f995950..827ad047c0 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -569,6 +569,14 @@ async def extract_maintainers(
         saved_maintainer_file: str | None = None,
     ):
         total_cost = 0
+        candidate_files: list[str] = []
+        ai_suggested_file: str | None = None
+
+        def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
+            result.total_cost = total_cost
+            result.candidate_files = candidate_files
+            result.ai_suggested_file = ai_suggested_file
+            return result
 
         # Step 1: Try the previously saved maintainer file
         if saved_maintainer_file:
@@ -576,12 +584,12 @@ async def extract_maintainers(
             result, cost = await self.try_saved_maintainer_file(repo_path, saved_maintainer_file)
             total_cost += cost
             if result:
-                result.total_cost = total_cost
-                return result
+                return _attach_metadata(result)
             self.logger.info("Falling back to maintainer file detection")
 
         # Step 2: Find candidates via static list + ripgrep dynamic search
         candidates = await self.find_candidate_files(repo_path)
+        candidate_files = [path for path, _ in candidates]
 
         # Step 3: Try AI analysis on candidates, stop on first success
         if candidates:
@@ -590,8 +598,7 @@ async def extract_maintainers(
                 try:
                     result = await self.analyze_and_build_result(filename, content)
                     total_cost += result.total_cost
-                    result.total_cost = total_cost
-                    return result
+                    return _attach_metadata(result)
                 except MaintanerAnalysisError as e:
                     total_cost += e.ai_cost
                     self.logger.warning(f"AI analysis failed for '{filename}': {e.error_message}")
@@ -607,6 +614,7 @@ async def extract_maintainers(
         # Step 4: AI file detection as last resort
         file_names = await self._list_repo_files(repo_path)
         ai_file_name, ai_cost = await self.find_maintainer_file_with_ai(file_names)
+        ai_suggested_file = ai_file_name
         total_cost += ai_cost
 
         if ai_file_name:
@@ -617,8 +625,7 @@ async def extract_maintainers(
                         content = await f.read()
                     result = await self.analyze_and_build_result(ai_file_name, content)
                     total_cost += result.total_cost
-                    result.total_cost = total_cost
-                    return result
+                    return _attach_metadata(result)
                 except MaintanerAnalysisError as e:
                     total_cost += e.ai_cost
                     self.logger.warning(
@@ -702,6 +709,8 @@ async def process_maintainers(
         ai_cost = 0.0
         maintainers_found = 0
         maintainers_skipped = 0
+        candidate_files: list[str] = []
+        ai_suggested_file: str | None = None
 
         try:
             has_interval_elapsed, remaining_hours = await self.check_if_interval_elapsed(
@@ -720,6 +729,8 @@ async def process_maintainers(
             latest_maintainer_file = maintainers.maintainer_file
             ai_cost = maintainers.total_cost
             maintainers_found = len(maintainers.maintainer_info)
+            candidate_files = maintainers.candidate_files
+            ai_suggested_file = maintainers.ai_suggested_file
 
             if repository.parent_repo:
                 filtered_maintainers = await self.exclude_parent_repo_maintainers(
@@ -774,6 +785,8 @@ async def process_maintainers(
                     "ai_cost": ai_cost,
                     "maintainers_found": maintainers_found,
                     "maintainers_skipped": maintainers_skipped,
+                    "candidate_files": candidate_files,
+                    "ai_suggested_file": ai_suggested_file,
                 },
             )
             await save_service_execution(service_execution)

From 9018e808053d218abf778d8d068aec632757cbda Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 13:32:20 +0000
Subject: [PATCH 05/16] feat: change candidate file detection to be more narrow

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/models/maintainer_info.py    |   2 +-
 .../services/maintainer/maintainer_service.py | 272 ++++++++++--------
 2 files changed, 146 insertions(+), 128 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
index 6914059a2b..1752999e54 100644
--- a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
+++ b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
@@ -34,5 +34,5 @@ class MaintainerResult(BaseModel):
     maintainer_file: str | None = None
     maintainer_info: list[MaintainerInfoItem] | None = None
     total_cost: float = 0
-    candidate_files: list[str] = []
+    candidate_files: list[tuple[str, int]] = []
     ai_suggested_file: str | None = None
diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 827ad047c0..88d5e5ec57 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -45,28 +45,51 @@ class MaintainerService(BaseService):
     """Service for processing maintainer data"""
 
     MAX_CHUNK_SIZE = 5000
-    MAX_CONCURRENT_CHUNKS = 3  # Maximum concurrent chunk processing
-    MAX_AI_ANALYSIS_ATTEMPTS = 3
-
-    # List of common maintainer file names
-    MAINTAINER_FILES = [
-        "MAINTAINERS",
-        "MAINTAINERS.md",
-        "MAINTAINER.md",
-        "CODEOWNERS",
-        "CODEOWNERS.md",
-        "CONTRIBUTORS",
-        "CONTRIBUTORS.md",
-        "OWNERS",
-        "OWNERS.md",
-        "AUTHORS",
-        "AUTHORS.md",
-        "docs/MAINTAINERS.md",
-        ".github/MAINTAINERS.md",
-        ".github/CONTRIBUTORS.md",
-        ".github/CODEOWNERS",
-        "GOVERNANCE.md",
-    ]
+    MAX_CONCURRENT_CHUNKS = 3
+
+    # Full paths that get the highest score bonus when matched exactly
+    KNOWN_PATHS = {
+        "maintainers",
+        "maintainers.md",
+        "maintainer.md",
+        "codeowners",
+        "codeowners.md",
+        "contributors",
+        "contributors.md",
+        "owners",
+        "owners.md",
+        "authors",
+        "authors.md",
+        "governance.md",
+        "docs/maintainers.md",
+        ".github/maintainers.md",
+        ".github/contributors.md",
+        ".github/codeowners",
+    }
+
+    # Governance stems (basename without extension, lowercased) for filename search
+    GOVERNANCE_STEMS = {
+        "maintainers",
+        "maintainer",
+        "codeowners",
+        "codeowner",
+        "contributors",
+        "contributor",
+        "owners",
+        "owners_aliases",
+        "authors",
+        "committers",
+        "commiters",
+        "reviewers",
+        "approvers",
+        "administrators",
+        "stewards",
+        "credits",
+        "governance",
+        "core_team",
+        "code_owners",
+        "emeritus",
+    }
 
     VALID_EXTENSIONS = {
         "",
@@ -79,26 +102,31 @@ class MaintainerService(BaseService):
         ".toml",
         ".adoc",
         ".csv",
+        ".rdoc",
     }
 
-    CONTENT_VALIDATION_KEYWORDS = [
+    SCORING_KEYWORDS = [
         "maintainer",
         "codeowner",
         "owner",
         "contributor",
-        "author",
-        "reviewer",
         "governance",
-        "lead",
-        "approver",
-        "committer",
-        "credit",
-        "administrator",
         "steward",
         "emeritus",
+        "approver",
+        "reviewer",
     ]
 
-    EXCLUDED_FILENAMES = {"contributing.md", "contributing"}
+    EXCLUDED_FILENAMES = {
+        "contributing.md",
+        "contributing",
+        "code_of_conduct.md",
+        "code-of-conduct.md",
+    }
+
+    FULL_PATH_SCORE = 100
+    STEM_MATCH_SCORE = 50
+    PARTIAL_STEM_SCORE = 25
 
     def make_role(self, title: str):
         title = title.lower()
@@ -385,7 +413,7 @@ def get_maintainer_file_prompt(self, example_files: list[str], file_names: list[
 
     async def find_maintainer_file_with_ai(self, file_names):
         self.logger.info("Using AI to find maintainer files...")
-        prompt = self.get_maintainer_file_prompt(self.MAINTAINER_FILES, file_names)
+        prompt = self.get_maintainer_file_prompt(sorted(self.KNOWN_PATHS), file_names)
         result = await invoke_bedrock(prompt, pydantic_model=MaintainerFile)
 
         if result.output.file_name is not None:
@@ -395,40 +423,39 @@ async def find_maintainer_file_with_ai(self, file_names):
             return None, result.cost
 
     async def _list_repo_files(self, repo_path: str) -> list[str]:
-        """List all files in the repo recursively, respecting .gitignore via rg."""
-        try:
-            output = await run_shell_command(
-                ["rg", "--files", "--hidden", "--glob", "!.git/", "."], cwd=repo_path
-            )
-            return [
-                line[2:] if line.startswith("./") else line
-                for line in output.strip().split("\n")
-                if line.strip()
-            ]
-        except Exception as e:
-            self.logger.warning(f"rg --files failed, falling back to os.walk: {repr(e)}")
-            results = []
-            for dirpath, dirnames, filenames in os.walk(repo_path):
-                dirnames[:] = [d for d in dirnames if d != ".git"]
-                for filename in filenames:
-                    full_path = os.path.join(dirpath, filename)
-                    results.append(os.path.relpath(full_path, repo_path))
-            return results
+        """List non-code files in the repo recursively, filtered by VALID_EXTENSIONS."""
+        glob_args = ["--glob", "!.git/"]
+        for ext in self.VALID_EXTENSIONS:
+            glob_args.extend(["--iglob", f"*{ext}"])
 
-    async def _ripgrep_search(self, repo_path: str) -> list[str]:
-        """Search for files containing maintainer-related keywords, filtered to valid extensions."""
-        pattern = "|".join(self.CONTENT_VALIDATION_KEYWORDS)
+        output = await run_shell_command(
+            ["rg", "--files", "--hidden", *glob_args, "."], cwd=repo_path
+        )
+        return [
+            line[2:] if line.startswith("./") else line
+            for line in output.strip().split("\n")
+            if line.strip()
+        ]
 
-        exclusion_globs = ["--glob", "!.git/"]
-        for name in self.EXCLUDED_FILENAMES:
-            exclusion_globs.extend(["--iglob", f"!{name}"])
+    async def _ripgrep_search(self, repo_path: str) -> list[str]:
+        """Search for files whose basename matches a governance stem, at any depth."""
+        glob_args = ["--glob", "!.git/"]
+        for stem in self.GOVERNANCE_STEMS:
+            glob_args.extend(
+                [
+                    "--iglob",
+                    f"*{stem}*",
+                    "--iglob",
+                    f"*{stem}*.*",
+                ]
+            )
 
         try:
             output = await run_shell_command(
-                ["rg", "-l", "-i", "--hidden", pattern, *exclusion_globs, "."], cwd=repo_path
+                ["rg", "--files", "--hidden", *glob_args, "."], cwd=repo_path
             )
         except CommandExecutionError:
-            self.logger.info("Ripgrep found no files containing maintainer keywords")
+            self.logger.info("Ripgrep found no governance files by filename")
             return []
         except Exception as e:
             self.logger.warning(f"Ripgrep search failed: {repr(e)}")
@@ -442,74 +469,64 @@ async def _ripgrep_search(self, repo_path: str) -> list[str]:
             if line.startswith("./"):
                 line = line[2:]
             basename = os.path.basename(line).lower()
+            if basename in self.EXCLUDED_FILENAMES:
+                continue
             ext = os.path.splitext(basename)[1]
             if ext not in self.VALID_EXTENSIONS:
-                self.logger.debug(f"Skipping '{line}': extension '{ext}' not in valid extensions")
-                continue
-            if ext == "" and not any(kw in basename for kw in self.CONTENT_VALIDATION_KEYWORDS):
-                self.logger.debug(
-                    f"Skipping extensionless file '{line}': "
-                    f"basename '{basename}' contains no governance keyword"
-                )
                 continue
             results.append(line)
 
-        self.logger.info(f"Ripgrep found {len(results)} candidate files after filtering")
+        self.logger.info(f"Ripgrep found {len(results)} governance files by filename")
         return results
 
-    async def find_candidate_files(self, repo_path: str) -> list[tuple[str, str]]:
+    def _score_filename(self, candidate_path: str) -> int:
+        """Score by how closely the filename matches known governance patterns."""
+        path = candidate_path.lower()
+        if path in self.KNOWN_PATHS:
+            return self.FULL_PATH_SCORE
+        stem = os.path.splitext(os.path.basename(path))[0].lstrip(".")
+        if stem in self.GOVERNANCE_STEMS:
+            return self.STEM_MATCH_SCORE
+        if any(known_stem in stem for known_stem in self.GOVERNANCE_STEMS):
+            return self.PARTIAL_STEM_SCORE
+        return 0
+
+    async def find_candidate_files(self, repo_path: str) -> list[tuple[str, str, int]]:
         """
-        Find all potential maintainer files using static list + dynamic ripgrep search.
-        Returns ordered list of (relative_path, content) tuples.
-        Static matches come first, then dynamic matches sorted by content keyword score.
+        Find governance files by filename, score them, and return all candidates sorted by score.
+        Scoring: full known-path match (100) > exact stem (50) > partial stem (25) + content keywords (+1 each).
         """
-        candidates_static = []
-        static_paths_lower = set()
-
-        for file in self.MAINTAINER_FILES:
-            file_path = os.path.join(repo_path, file)
-            if await aiofiles.os.path.isfile(file_path):
-                try:
-                    async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
-                        content = await f.read()
-                    candidates_static.append((file, content))
-                    static_paths_lower.add(file.lower())
-                    self.logger.info(f"Static match found: {file}")
-                except Exception as e:
-                    self.logger.warning(f"Failed to read static match {file}: {repr(e)}")
-
-        dynamic_paths = await self._ripgrep_search(repo_path)
-
-        scored_dynamic = []
-        for candidate_path in dynamic_paths:
-            if candidate_path.lower() in static_paths_lower:
-                continue
+        found_paths = await self._ripgrep_search(repo_path)
+        if not found_paths:
+            return []
 
+        scored = []
+        for candidate_path in found_paths:
             file_path = os.path.join(repo_path, candidate_path)
             try:
                 async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
                     content = await f.read()
             except Exception as e:
-                self.logger.warning(f"Failed to read dynamic match {candidate_path}: {repr(e)}")
+                self.logger.warning(f"Failed to read candidate {candidate_path}: {repr(e)}")
                 continue
 
-            content_lower = content.lower()
-            # Calculate score based on keywords matched in the content
-            score = sum(1 for kw in self.CONTENT_VALIDATION_KEYWORDS if kw in content_lower)
-            if score > 0:
-                scored_dynamic.append((candidate_path, content, score))
-                self.logger.info(
-                    f"Dynamic match validated: {candidate_path} (keyword score: {score})"
-                )
+            filename_score = self._score_filename(candidate_path)
+            content_score = sum(1 for kw in self.SCORING_KEYWORDS if kw in content.lower())
+            total = filename_score + content_score
 
-        # Sort by score DESC
-        scored_dynamic.sort(key=lambda c: c[2], reverse=True)
+            scored.append((candidate_path, content, total))
+            self.logger.info(
+                f"Candidate: {candidate_path} "
+                f"(filename: {filename_score}, content: {content_score}, total: {total})"
+            )
 
-        result = candidates_static + [(path, content) for path, content, _ in scored_dynamic]
-        self.logger.info(
-            f"Found {len(candidates_static)} static and {len(scored_dynamic)} dynamic candidates"
-        )
-        return result
+        scored.sort(key=lambda c: c[2], reverse=True)
+
+        if scored:
+            self.logger.info(f"Top candidate: {scored[0][0]} (from {len(scored)} total)")
+        else:
+            self.logger.info("No valid candidates after scoring")
+        return scored
 
     async def analyze_and_build_result(self, filename: str, content: str) -> MaintainerResult:
         """
@@ -569,7 +586,7 @@ async def extract_maintainers(
         saved_maintainer_file: str | None = None,
     ):
         total_cost = 0
-        candidate_files: list[str] = []
+        candidate_files: list[tuple[str, int]] = []
         ai_suggested_file: str | None = None
 
         def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
@@ -587,27 +604,24 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
                 return _attach_metadata(result)
             self.logger.info("Falling back to maintainer file detection")
 
-        # Step 2: Find candidates via static list + ripgrep dynamic search
+        # Step 2: Find top candidate via filename search + scoring
         candidates = await self.find_candidate_files(repo_path)
-        candidate_files = [path for path, _ in candidates]
+        candidate_files = [(path, score) for path, _, score in candidates]
 
-        # Step 3: Try AI analysis on candidates, stop on first success
+        # Step 3: Try AI analysis on top candidate
         if candidates:
-            attempts = min(len(candidates), self.MAX_AI_ANALYSIS_ATTEMPTS)
-            for filename, content in candidates[:attempts]:
-                try:
-                    result = await self.analyze_and_build_result(filename, content)
-                    total_cost += result.total_cost
-                    return _attach_metadata(result)
-                except MaintanerAnalysisError as e:
-                    total_cost += e.ai_cost
-                    self.logger.warning(f"AI analysis failed for '{filename}': {e.error_message}")
-                except Exception as e:
-                    self.logger.warning(f"Unexpected error analyzing '{filename}': {repr(e)}")
+            filename, content, _ = candidates[0]
+            try:
+                result = await self.analyze_and_build_result(filename, content)
+                total_cost += result.total_cost
+                return _attach_metadata(result)
+            except MaintanerAnalysisError as e:
+                total_cost += e.ai_cost
+                self.logger.warning(f"AI analysis failed for '{filename}': {e.error_message}")
+            except Exception as e:
+                self.logger.warning(f"Unexpected error analyzing '{filename}': {repr(e)}")
 
-            self.logger.warning(
-                f"AI analysis failed for all {attempts} candidate(s), trying AI file detection"
-            )
+            self.logger.warning("Top candidate failed, trying AI file detection")
         else:
             self.logger.warning("No candidate files found via search, trying AI file detection")
 
@@ -619,7 +633,11 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
 
         if ai_file_name:
             file_path = os.path.join(repo_path, ai_file_name)
-            if await aiofiles.os.path.isfile(file_path):
+            if not await aiofiles.os.path.isfile(file_path):
+                self.logger.warning(
+                    f"AI suggested '{ai_file_name}' but file does not exist on disk"
+                )
+            else:
                 try:
                     async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
                         content = await f.read()

From ae33af60a883bfec1f729207ce0e82691cf50dde Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 13:37:33 +0000
Subject: [PATCH 06/16] fix: enable email fallback for identity lookup during
 maintainer update

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../services/maintainer/maintainer_service.py        | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 88d5e5ec57..d52c99df86 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -188,14 +188,18 @@ async def compare_and_update_maintainers(
         for github_username, maintainer in new_maintainers_dict.items():
             role = maintainer.normalized_title
             original_role = self.make_role(maintainer.title)
-            if github_username == "unknown":
+            if github_username == "unknown" and maintainer.email in ("unknown", None):
                 self.logger.warning(
-                    f"Skipping unkown github_username with title {maintainer.title}"
+                    f"Skipping unknown github_username & email with title {maintainer.title}"
                 )
                 continue
             elif github_username not in current_maintainers_dict:
                 # New maintainer
-                identity_id = await find_github_identity(github_username)
+                identity_id = (
+                    await find_github_identity(github_username)
+                    if github_username != "unknown"
+                    else await find_maintainer_identity_by_email(maintainer.email)
+                )
                 self.logger.info(f"Found new maintainer {github_username} to be inserted")
                 if identity_id:
                     await upsert_maintainer(
@@ -205,7 +209,7 @@ async def compare_and_update_maintainers(
                         f"Successfully inserted new maintainer {github_username} with identity_id {identity_id}"
                     )
                 else:
-                    # will happend for new users if their identity isn't created yet but should fixed on the next iteration
+                    # will happen for new users if their identity isn't created yet but should be fixed on the next iteration
                     self.logger.warning(f"Identity not found for username: {github_username}")
             else:
                 # Existing maintainer

From 019f6df6c7a838e1b90e5aeee9559284ee6546e8 Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 13:45:05 +0000
Subject: [PATCH 07/16] chore: avoid bulding ai prompt when full content if
 batching is required

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/services/maintainer/maintainer_service.py  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index d52c99df86..b89923c21c 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -309,7 +309,6 @@ def get_extraction_prompt(self, filename: str, content_to_analyze: str) -> str:
         """
 
     async def analyze_file_content(self, maintainer_filename: str, content: str):
-        prompt = self.get_extraction_prompt(maintainer_filename, content)
         if len(content) > self.MAX_CHUNK_SIZE:
             self.logger.info(
                 "Maintainers file content exceeded max chunk size, splitting into chunks"
@@ -353,7 +352,10 @@ async def process_chunk(chunk_index: int, chunk: str):
                 aggregated_info.cost += chunk_info.cost
             maintainer_info = aggregated_info
         else:
-            maintainer_info = await invoke_bedrock(prompt, pydantic_model=MaintainerInfo)
+            maintainer_info = await invoke_bedrock(
+                self.get_extraction_prompt(maintainer_filename, content),
+                pydantic_model=MaintainerInfo,
+            )
         self.logger.info("Maintainers file content analyzed by AI")
         self.logger.info(f"Maintainers response: {maintainer_info}")
         if maintainer_info.output.info is not None:

From f284e8ed0df89f5386f65657060a18df04b8062f Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 13:46:19 +0000
Subject: [PATCH 08/16] fix: remove duplicate rg pattern

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../crowdgit/services/maintainer/maintainer_service.py   | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index b89923c21c..93bdad5f32 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -447,14 +447,7 @@ async def _ripgrep_search(self, repo_path: str) -> list[str]:
         """Search for files whose basename matches a governance stem, at any depth."""
         glob_args = ["--glob", "!.git/"]
         for stem in self.GOVERNANCE_STEMS:
-            glob_args.extend(
-                [
-                    "--iglob",
-                    f"*{stem}*",
-                    "--iglob",
-                    f"*{stem}*.*",
-                ]
-            )
+            glob_args.extend(["--iglob", f"*{stem}*"])
 
         try:
             output = await run_shell_command(

From 969944ee197f042dbaccfc0f7798a2854474bb2b Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 13:54:24 +0000
Subject: [PATCH 09/16] chore: add extra validation for reamde files to have
 maintainer keyword in content

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/services/maintainer/maintainer_service.py   | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 93bdad5f32..5f772cc7df 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -533,6 +533,11 @@ async def analyze_and_build_result(self, filename: str, content: str) -> Maintai
         Raises MaintanerAnalysisError if no maintainers are found.
         """
         self.logger.info(f"Analyzing maintainer file: {filename}")
+        if "readme" in filename.lower() and "maintainer" not in content.lower():
+            self.logger.warning(
+                f"Skipping README file '{filename}': no 'maintainer' keyword found in content"
+            )
+            raise MaintanerAnalysisError(error_code=ErrorCode.NO_MAINTAINER_FOUND)
         result = await self.analyze_file_content(filename, content)
 
         if not result.output.info:

From 77407152ca8b0f66a5839eeb8f126ca9be72b8df Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 14:32:41 +0000
Subject: [PATCH 10/16] feat: improve ai fallback detection by passing scored
 candidates and improve prompt

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../services/maintainer/maintainer_service.py | 56 +++++++++++++++----
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 5f772cc7df..37c45ca291 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -46,6 +46,7 @@ class MaintainerService(BaseService):
 
     MAX_CHUNK_SIZE = 5000
     MAX_CONCURRENT_CHUNKS = 3
+    MAX_AI_FILE_LIST_SIZE = 300
 
     # Full paths that get the highest score bonus when matched exactly
     KNOWN_PATHS = {
@@ -376,33 +377,40 @@ async def process_chunk(chunk_index: int, chunk: str):
                 ai_cost=maintainer_info.cost,
             )
 
-    def get_maintainer_file_prompt(self, example_files: list[str], file_names: list[str]) -> str:
+    def get_maintainer_file_prompt(
+        self, example_files: list[str], candidates: list[tuple[str, int]]
+    ) -> str:
         """
         Generates the prompt for the LLM to identify a maintainer file from a list.
+        candidates: list of (filename, score) where score reflects name-match strength.
         """
         example_files_str = "\n".join(f"- {name}" for name in example_files)
-        file_names_str = "\n".join(f"- {name}" for name in file_names)
+        candidates_str = "\n".join(f"- {name}  [score={score}]" for name, score in candidates)
 
         return f"""
-        You are an expert AI assistant specializing in identifying repository governance files. Your task is to find a maintainer file from a given list of filenames.
+        You are an expert AI assistant specializing in identifying repository governance files. Your task is to find the single best maintainer file from a given list of candidates.
 
         <instructions>
-        1.  **Analyze the Input**: Carefully review the list of filenames provided in the `<file_list>` tag.
-        2.  **Identify a Maintainer File**: Compare each filename against the characteristics of a maintainer file. These files typically define project ownership, governance, or code owners. Use the `<example_maintainer_files>` as a guide.
-        3.  **Apply Rules**: Follow all constraints listed in the `<rules>` section, especially the exclusion rule.
-        4.  **Select the First Match**: Scan the list and select the *first* filename that you identify as a maintainer file. You only need to find one. Once a match is found, stop searching.
+        1.  **Analyze the Input**: Carefully review the list of candidates in the `<file_list>` tag. Each entry shows the file path and a pre-computed name-match score.
+        2.  **Identify the Best Maintainer File**: Compare each candidate against the characteristics of a maintainer file. These files typically define project ownership, governance, or code owners. Use the `<example_maintainer_files>` as a guide.
+        3.  **Use Signals to Rank**: When multiple candidates qualify, prefer:
+            - Higher **score** — stronger filename match against known governance patterns.
+            - Fewer path separators (`/`) in the path — files closer to the repo root apply to the whole project; deeply nested files are usually component-specific.
+            - When score and nesting conflict, prefer the file most likely to be the repo-wide governance file.
+        4.  **Apply Rules**: Follow all constraints listed in the `<rules>` section.
         5.  **Format the Output**: Return your answer as a single JSON object according to the `<output_format>` specification, and nothing else.
         </instructions>
 
         <rules>
         - **Definition**: A maintainer file's name usually contains keywords like `MAINTAINERS`, `CODEOWNERS`, or `OWNERS`.
         - **Exclusion**: The filename `CONTRIBUTING.md` must ALWAYS be ignored and never selected, even if it's the only file that seems relevant.
+        - **Third-party exclusion**: Do NOT select files that are inside directories associated with vendored dependencies, third-party libraries, or packages consumed by the project (e.g. paths containing `vendor/`, `node_modules/`, `third_party/`, `external/`, `.cache/`, `dist/`, `site-packages/`). These files belong to external projects, not this repository's own governance.
         - **No Match**: If no file in the list matches the criteria after checking all of them, you must return the 'not_found' error.
         - **Empty Input**: If the `<file_list>` is empty or contains no filenames, you must return the 'not_found' error.
         </rules>
 
         <output_format>
-        - **If a maintainer file is found**: Return a JSON object in the format `{{"file_name": "<the_first_found_file_name>"}}`.
+        - **If a maintainer file is found**: Return a JSON object in the format `{{"file_name": "<the_best_matched_file_name>"}}`.
         - **If no maintainer file is found**: Return a JSON object in the format `{{"error": "not_found"}}`.
         </output_format>
 
@@ -411,15 +419,18 @@ def get_maintainer_file_prompt(self, example_files: list[str], file_names: list[
         </example_maintainer_files>
 
         <file_list>
-        {file_names_str}
+        {candidates_str}
         </file_list>
 
         Return only the final JSON object.
         """
 
-    async def find_maintainer_file_with_ai(self, file_names):
+    async def find_maintainer_file_with_ai(
+        self, candidates: list[tuple[str, int]]
+    ) -> tuple[str | None, float]:
+        """Ask AI to select the best maintainer file from scored candidates."""
         self.logger.info("Using AI to find maintainer files...")
-        prompt = self.get_maintainer_file_prompt(sorted(self.KNOWN_PATHS), file_names)
+        prompt = self.get_maintainer_file_prompt(sorted(self.KNOWN_PATHS), candidates)
         result = await invoke_bedrock(prompt, pydantic_model=MaintainerFile)
 
         if result.output.file_name is not None:
@@ -613,6 +624,7 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
         candidate_files = [(path, score) for path, _, score in candidates]
 
         # Step 3: Try AI analysis on top candidate
+        failed_candidate: str | None = None
         if candidates:
             filename, content, _ = candidates[0]
             try:
@@ -625,13 +637,33 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
             except Exception as e:
                 self.logger.warning(f"Unexpected error analyzing '{filename}': {repr(e)}")
 
+            failed_candidate = filename
             self.logger.warning("Top candidate failed, trying AI file detection")
         else:
             self.logger.warning("No candidate files found via search, trying AI file detection")
 
         # Step 4: AI file detection as last resort
         file_names = await self._list_repo_files(repo_path)
-        ai_file_name, ai_cost = await self.find_maintainer_file_with_ai(file_names)
+        # Pre-filter to governance-scored files to keep the AI prompt within model limits.
+        # Fall back to a hard-capped slice of the full list if nothing scores.
+        # Exclude the already-failed top candidate to avoid re-suggesting it.
+        scored_tuples = [
+            (f, self._score_filename(f))
+            for f in file_names
+            if self._score_filename(f) > 0 and f != failed_candidate
+        ]
+        ai_input_files: list[tuple[str, int]] = (
+            scored_tuples
+            if scored_tuples
+            else [
+                (f, 0) for f in file_names[: self.MAX_AI_FILE_LIST_SIZE] if f != failed_candidate
+            ]
+        )
+        self.logger.info(
+            f"Passing {len(ai_input_files)} files to AI for maintainer file detection "
+            f"(total repo files: {len(file_names)})"
+        )
+        ai_file_name, ai_cost = await self.find_maintainer_file_with_ai(ai_input_files)
         ai_suggested_file = ai_file_name
         total_cost += ai_cost
 

From 684c85e1180bdab3f151236a60646c94238d7239 Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 14:34:35 +0000
Subject: [PATCH 11/16] chore: limit candiate_files saved in db to 100

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/services/maintainer/maintainer_service.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 37c45ca291..82036590e3 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -621,7 +621,7 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
 
         # Step 2: Find top candidate via filename search + scoring
         candidates = await self.find_candidate_files(repo_path)
-        candidate_files = [(path, score) for path, _, score in candidates]
+        candidate_files = [(path, score) for path, _, score in candidates][:100]
 
         # Step 3: Try AI analysis on top candidate
         failed_candidate: str | None = None

From d908df5d176541bcab9b14bfdca60473189fdd47 Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Wed, 11 Mar 2026 17:17:56 +0000
Subject: [PATCH 12/16] chore: add extra filename & stems

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/services/maintainer/maintainer_service.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 82036590e3..a5f4d2f1f9 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -66,6 +66,7 @@ class MaintainerService(BaseService):
         ".github/maintainers.md",
         ".github/contributors.md",
         ".github/codeowners",
+        "SECURITY-INSIGHTS.md",
     }
 
     # Governance stems (basename without extension, lowercased) for filename search
@@ -90,6 +91,7 @@ class MaintainerService(BaseService):
         "core_team",
         "code_owners",
         "emeritus",
+        "workgroup",
     }
 
     VALID_EXTENSIONS = {

From fa2dd367cacb099029914758b518201441c19e11 Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Fri, 13 Mar 2026 17:01:54 +0000
Subject: [PATCH 13/16] feat: analyze all root files combined and fallback to
 top one in subdirectories

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../services/maintainer/maintainer_service.py | 125 +++++++++++++-----
 1 file changed, 93 insertions(+), 32 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index a5f4d2f1f9..c3590b4b54 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -456,15 +456,21 @@ async def _list_repo_files(self, repo_path: str) -> list[str]:
             if line.strip()
         ]
 
-    async def _ripgrep_search(self, repo_path: str) -> list[str]:
-        """Search for files whose basename matches a governance stem, at any depth."""
+    async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) -> list[str]:
+        """Search for files whose basename matches a governance stem.
+
+        Args:
+            max_depth: If set, passed as --max-depth to ripgrep (1 = repo root files only).
+        """
         glob_args = ["--glob", "!.git/"]
         for stem in self.GOVERNANCE_STEMS:
             glob_args.extend(["--iglob", f"*{stem}*"])
 
+        depth_args = ["--max-depth", str(max_depth)] if max_depth is not None else []
+
         try:
             output = await run_shell_command(
-                ["rg", "--files", "--hidden", *glob_args, "."], cwd=repo_path
+                ["rg", "--files", "--hidden", *depth_args, *glob_args, "."], cwd=repo_path
             )
         except CommandExecutionError:
             self.logger.info("Ripgrep found no governance files by filename")
@@ -503,17 +509,26 @@ def _score_filename(self, candidate_path: str) -> int:
             return self.PARTIAL_STEM_SCORE
         return 0
 
-    async def find_candidate_files(self, repo_path: str) -> list[tuple[str, str, int]]:
+    async def find_candidate_files(
+        self, repo_path: str
+    ) -> tuple[list[tuple[str, str, int]], list[tuple[str, str, int]]]:
         """
-        Find governance files by filename, score them, and return all candidates sorted by score.
+        Find governance files by filename, score them, and return (root_candidates, subdir_candidates).
+
+        Root candidates are files directly in the repo root (max-depth 0).
+        Subdir candidates are files in subdirectories.
+        Both lists are sorted by score descending.
         Scoring: full known-path match (100) > exact stem (50) > partial stem (25) + content keywords (+1 each).
         """
-        found_paths = await self._ripgrep_search(repo_path)
-        if not found_paths:
-            return []
+        root_paths = set(await self._ripgrep_search(repo_path, max_depth=1))
+        all_paths = await self._ripgrep_search(repo_path)
+        if not all_paths:
+            return [], []
+
+        root_scored: list[tuple[str, str, int]] = []
+        subdir_scored: list[tuple[str, str, int]] = []
 
-        scored = []
-        for candidate_path in found_paths:
+        for candidate_path in all_paths:
             file_path = os.path.join(repo_path, candidate_path)
             try:
                 async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
@@ -526,19 +541,24 @@ async def find_candidate_files(self, repo_path: str) -> list[tuple[str, str, int
             content_score = sum(1 for kw in self.SCORING_KEYWORDS if kw in content.lower())
             total = filename_score + content_score
 
-            scored.append((candidate_path, content, total))
+            entry = (candidate_path, content, total)
+            if candidate_path in root_paths:
+                root_scored.append(entry)
+            else:
+                subdir_scored.append(entry)
+
             self.logger.info(
                 f"Candidate: {candidate_path} "
                 f"(filename: {filename_score}, content: {content_score}, total: {total})"
             )
 
-        scored.sort(key=lambda c: c[2], reverse=True)
+        root_scored.sort(key=lambda c: c[2], reverse=True)
+        subdir_scored.sort(key=lambda c: c[2], reverse=True)
 
-        if scored:
-            self.logger.info(f"Top candidate: {scored[0][0]} (from {len(scored)} total)")
-        else:
-            self.logger.info("No valid candidates after scoring")
-        return scored
+        self.logger.info(
+            f"Found {len(root_scored)} root candidate(s) and {len(subdir_scored)} subdirectory candidate(s)"
+        )
+        return root_scored, subdir_scored
 
     async def analyze_and_build_result(self, filename: str, content: str) -> MaintainerResult:
         """
@@ -621,14 +641,56 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
                 return _attach_metadata(result)
             self.logger.info("Falling back to maintainer file detection")
 
-        # Step 2: Find top candidate via filename search + scoring
-        candidates = await self.find_candidate_files(repo_path)
-        candidate_files = [(path, score) for path, _, score in candidates][:100]
+        # Step 2: Find candidates via filename search + scoring, split by depth
+        root_candidates, subdir_candidates = await self.find_candidate_files(repo_path)
+        all_candidates = root_candidates + subdir_candidates
+        candidate_files = [(path, score) for path, _, score in all_candidates][:100]
+
+        # Step 3: Try root-level files first (in score order), then top subdirectory file
+        failed_candidates: set[str] = set()
+
+        if not all_candidates:
+            self.logger.warning("No candidate files found via search, trying AI file detection")
+
+        combined_info: list = []
+        best_file: str | None = None
+        best_file_count: int = 0
+
+        for filename, content, _ in root_candidates:
+            try:
+                result = await self.analyze_and_build_result(filename, content)
+                total_cost += result.total_cost
+                file_info = result.maintainer_info or []
+                combined_info.extend(file_info)
+                if len(file_info) > best_file_count:
+                    best_file = filename
+                    best_file_count = len(file_info)
+            except MaintanerAnalysisError as e:
+                total_cost += e.ai_cost
+                self.logger.warning(
+                    f"AI analysis failed for root file '{filename}': {e.error_message}"
+                )
+            except Exception as e:
+                self.logger.warning(
+                    f"Unexpected error analyzing root file '{filename}': {repr(e)}"
+                )
+            failed_candidates.add(filename)
+
+        if combined_info:
+            return _attach_metadata(
+                MaintainerResult(
+                    maintainer_file=best_file,
+                    maintainer_info=combined_info,
+                )
+            )
 
-        # Step 3: Try AI analysis on top candidate
-        failed_candidate: str | None = None
-        if candidates:
-            filename, content, _ = candidates[0]
+        if root_candidates and subdir_candidates:
+            self.logger.warning("All root candidates failed, trying top subdirectory candidate")
+        elif root_candidates:
+            self.logger.warning("All root candidates failed, trying AI file detection")
+
+        if subdir_candidates:
+            filename, content, _ = subdir_candidates[0]
             try:
                 result = await self.analyze_and_build_result(filename, content)
                 total_cost += result.total_cost
@@ -638,27 +700,26 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
                 self.logger.warning(f"AI analysis failed for '{filename}': {e.error_message}")
             except Exception as e:
                 self.logger.warning(f"Unexpected error analyzing '{filename}': {repr(e)}")
-
-            failed_candidate = filename
-            self.logger.warning("Top candidate failed, trying AI file detection")
-        else:
-            self.logger.warning("No candidate files found via search, trying AI file detection")
+            failed_candidates.add(filename)
+            self.logger.warning("Top subdirectory candidate failed, trying AI file detection")
 
         # Step 4: AI file detection as last resort
         file_names = await self._list_repo_files(repo_path)
         # Pre-filter to governance-scored files to keep the AI prompt within model limits.
         # Fall back to a hard-capped slice of the full list if nothing scores.
-        # Exclude the already-failed top candidate to avoid re-suggesting it.
+        # Exclude all already-failed candidates to avoid re-suggesting them.
         scored_tuples = [
             (f, self._score_filename(f))
             for f in file_names
-            if self._score_filename(f) > 0 and f != failed_candidate
+            if self._score_filename(f) > 0 and f not in failed_candidates
         ]
         ai_input_files: list[tuple[str, int]] = (
             scored_tuples
             if scored_tuples
             else [
-                (f, 0) for f in file_names[: self.MAX_AI_FILE_LIST_SIZE] if f != failed_candidate
+                (f, 0)
+                for f in file_names[: self.MAX_AI_FILE_LIST_SIZE]
+                if f not in failed_candidates
             ]
         )
         self.logger.info(

From 2cd6f467decb2afb3935e4b29befac730562cf5c Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Thu, 26 Mar 2026 11:48:52 +0100
Subject: [PATCH 14/16] fix: improve extensionless file detection & debug logs

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../services/maintainer/maintainer_service.py | 41 +++++++++++++++----
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index c3590b4b54..79cd216d9d 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -66,7 +66,7 @@ class MaintainerService(BaseService):
         ".github/maintainers.md",
         ".github/contributors.md",
         ".github/codeowners",
-        "SECURITY-INSIGHTS.md",
+        "security-insights.md",
     }
 
     # Governance stems (basename without extension, lowercased) for filename search
@@ -445,7 +445,8 @@ async def _list_repo_files(self, repo_path: str) -> list[str]:
         """List non-code files in the repo recursively, filtered by VALID_EXTENSIONS."""
         glob_args = ["--glob", "!.git/"]
         for ext in self.VALID_EXTENSIONS:
-            glob_args.extend(["--iglob", f"*{ext}"])
+            if ext:
+                glob_args.extend(["--iglob", f"*{ext}"])
 
         output = await run_shell_command(
             ["rg", "--files", "--hidden", *glob_args, "."], cwd=repo_path
@@ -453,7 +454,7 @@ async def _list_repo_files(self, repo_path: str) -> list[str]:
         return [
             line[2:] if line.startswith("./") else line
             for line in output.strip().split("\n")
-            if line.strip()
+            if line.strip() and os.path.splitext(line)[1] in self.VALID_EXTENSIONS
         ]
 
     async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) -> list[str]:
@@ -475,6 +476,16 @@ async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) ->
         except CommandExecutionError:
             self.logger.info("Ripgrep found no governance files by filename")
             return []
+        except FileNotFoundError as e:
+            if not os.path.isdir(repo_path):
+                self.logger.warning(
+                    f"Ripgrep search failed: repo_path does not exist: '{repo_path}'"
+                )
+            else:
+                self.logger.warning(
+                    f"Ripgrep search failed: 'rg' binary not found in PATH. Install ripgrep. ({repr(e)})"
+                )
+            return []
         except Exception as e:
             self.logger.warning(f"Ripgrep search failed: {repr(e)}")
             return []
@@ -488,9 +499,11 @@ async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) ->
                 line = line[2:]
             basename = os.path.basename(line).lower()
             if basename in self.EXCLUDED_FILENAMES:
+                self.logger.debug(f"Excluding '{line}': basename in EXCLUDED_FILENAMES")
                 continue
             ext = os.path.splitext(basename)[1]
             if ext not in self.VALID_EXTENSIONS:
+                self.logger.debug(f"Excluding '{line}': extension '{ext}' not in VALID_EXTENSIONS")
                 continue
             results.append(line)
 
@@ -547,9 +560,9 @@ async def find_candidate_files(
             else:
                 subdir_scored.append(entry)
 
-            self.logger.info(
+            self.logger.debug(
                 f"Candidate: {candidate_path} "
-                f"(filename: {filename_score}, content: {content_score}, total: {total})"
+                f"(filename_score={filename_score}, content_score={content_score}, total={total})"
             )
 
         root_scored.sort(key=lambda c: c[2], reverse=True)
@@ -591,6 +604,7 @@ async def try_saved_maintainer_file(
         """
         cost = 0.0
         file_path = os.path.join(repo_path, saved_maintainer_file)
+        self.logger.debug(f"Checking saved maintainer file on disk: '{file_path}'")
 
         if not await aiofiles.os.path.isfile(file_path):
             self.logger.warning(
@@ -598,6 +612,7 @@ async def try_saved_maintainer_file(
             )
             return None, cost
 
+        self.logger.debug(f"Saved maintainer file exists, reading content: '{saved_maintainer_file}'")
         try:
             async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
                 content = await f.read()
@@ -645,6 +660,12 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
         root_candidates, subdir_candidates = await self.find_candidate_files(repo_path)
         all_candidates = root_candidates + subdir_candidates
         candidate_files = [(path, score) for path, _, score in all_candidates][:100]
+        self.logger.debug(
+            f"Detection step 2: {len(root_candidates)} root candidate(s), "
+            f"{len(subdir_candidates)} subdir candidate(s); "
+            f"root={[p for p, _, _ in root_candidates]}, "
+            f"subdir_top={[p for p, _, _ in subdir_candidates[:3]]}"
+        )
 
         # Step 3: Try root-level files first (in score order), then top subdirectory file
         failed_candidates: set[str] = set()
@@ -656,7 +677,8 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
         best_file: str | None = None
         best_file_count: int = 0
 
-        for filename, content, _ in root_candidates:
+        for filename, content, score in root_candidates:
+            self.logger.debug(f"Detection step 3: trying root candidate '{filename}' (score={score})")
             try:
                 result = await self.analyze_and_build_result(filename, content)
                 total_cost += result.total_cost
@@ -690,7 +712,8 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
             self.logger.warning("All root candidates failed, trying AI file detection")
 
         if subdir_candidates:
-            filename, content, _ = subdir_candidates[0]
+            filename, content, score = subdir_candidates[0]
+            self.logger.debug(f"Detection step 3b: trying top subdir candidate '{filename}' (score={score})")
             try:
                 result = await self.analyze_and_build_result(filename, content)
                 total_cost += result.total_cost
@@ -726,10 +749,12 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
             f"Passing {len(ai_input_files)} files to AI for maintainer file detection "
             f"(total repo files: {len(file_names)})"
         )
+        self.logger.debug(f"AI input files: {[f for f, _ in ai_input_files]}")
         ai_file_name, ai_cost = await self.find_maintainer_file_with_ai(ai_input_files)
         ai_suggested_file = ai_file_name
         total_cost += ai_cost
 
+        self.logger.debug(f"AI suggested file: '{ai_file_name}' (cost={ai_cost:.4f})")
         if ai_file_name:
             file_path = os.path.join(repo_path, ai_file_name)
             if not await aiofiles.os.path.isfile(file_path):
@@ -826,7 +851,7 @@ async def process_maintainers(
         ai_cost = 0.0
         maintainers_found = 0
         maintainers_skipped = 0
-        candidate_files: list[str] = []
+        candidate_files: list[tuple[str, int]] = []
         ai_suggested_file: str | None = None
 
         try:

From 1a6a08d93b94845b80078e820febf08e57176e92 Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Thu, 26 Mar 2026 13:25:20 +0100
Subject: [PATCH 15/16] feat: improve reamde.me handling

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/models/maintainer_info.py    |  1 +
 .../services/maintainer/maintainer_service.py | 29 ++++++++++++++-----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
index 1752999e54..9a298c7820 100644
--- a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
+++ b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py
@@ -36,3 +36,4 @@ class MaintainerResult(BaseModel):
     total_cost: float = 0
     candidate_files: list[tuple[str, int]] = []
     ai_suggested_file: str | None = None
+    not_found: bool = False
diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 79cd216d9d..455500bb3e 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -67,6 +67,7 @@ class MaintainerService(BaseService):
         ".github/contributors.md",
         ".github/codeowners",
         "security-insights.md",
+        "readme.md",
     }
 
     # Governance stems (basename without extension, lowercased) for filename search
@@ -579,9 +580,11 @@ async def analyze_and_build_result(self, filename: str, content: str) -> Maintai
         Raises MaintanerAnalysisError if no maintainers are found.
         """
         self.logger.info(f"Analyzing maintainer file: {filename}")
-        if "readme" in filename.lower() and "maintainer" not in content.lower():
+        if "readme" in filename.lower() and not any(
+            kw in content.lower() for kw in self.SCORING_KEYWORDS
+        ):
             self.logger.warning(
-                f"Skipping README file '{filename}': no 'maintainer' keyword found in content"
+                f"Skipping README file '{filename}': no governance keyword found in content"
             )
             raise MaintanerAnalysisError(error_code=ErrorCode.NO_MAINTAINER_FOUND)
         result = await self.analyze_file_content(filename, content)
@@ -612,7 +615,9 @@ async def try_saved_maintainer_file(
             )
             return None, cost
 
-        self.logger.debug(f"Saved maintainer file exists, reading content: '{saved_maintainer_file}'")
+        self.logger.debug(
+            f"Saved maintainer file exists, reading content: '{saved_maintainer_file}'"
+        )
         try:
             async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
                 content = await f.read()
@@ -678,7 +683,9 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
         best_file_count: int = 0
 
         for filename, content, score in root_candidates:
-            self.logger.debug(f"Detection step 3: trying root candidate '{filename}' (score={score})")
+            self.logger.debug(
+                f"Detection step 3: trying root candidate '{filename}' (score={score})"
+            )
             try:
                 result = await self.analyze_and_build_result(filename, content)
                 total_cost += result.total_cost
@@ -713,7 +720,9 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
 
         if subdir_candidates:
             filename, content, score = subdir_candidates[0]
-            self.logger.debug(f"Detection step 3b: trying top subdir candidate '{filename}' (score={score})")
+            self.logger.debug(
+                f"Detection step 3b: trying top subdir candidate '{filename}' (score={score})"
+            )
             try:
                 result = await self.analyze_and_build_result(filename, content)
                 total_cost += result.total_cost
@@ -754,7 +763,7 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
         ai_suggested_file = ai_file_name
         total_cost += ai_cost
 
-        self.logger.debug(f"AI suggested file: '{ai_file_name}' (cost={ai_cost:.4f})")
+        self.logger.info(f"AI suggested file: '{ai_file_name}' (cost={ai_cost:.4f})")
         if ai_file_name:
             file_path = os.path.join(repo_path, ai_file_name)
             if not await aiofiles.os.path.isfile(file_path):
@@ -775,7 +784,7 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
                     )
 
         self.logger.error("No maintainer file found")
-        raise MaintainerFileNotFoundError(ai_cost=total_cost)
+        return _attach_metadata(MaintainerResult(total_cost=total_cost, not_found=True))
 
     async def check_if_interval_elapsed(self, repository: Repository) -> tuple[bool, float]:
         """
@@ -870,10 +879,14 @@ async def process_maintainers(
             )
             latest_maintainer_file = maintainers.maintainer_file
             ai_cost = maintainers.total_cost
-            maintainers_found = len(maintainers.maintainer_info)
             candidate_files = maintainers.candidate_files
             ai_suggested_file = maintainers.ai_suggested_file
 
+            if maintainers.not_found:
+                raise MaintainerFileNotFoundError(ai_cost=ai_cost)
+
+            maintainers_found = len(maintainers.maintainer_info)
+
             if repository.parent_repo:
                 filtered_maintainers = await self.exclude_parent_repo_maintainers(
                     repository.parent_repo, maintainers.maintainer_info

From bbacdfa31d1156c862876850f2d93a44fd0ceecc Mon Sep 17 00:00:00 2001
From: Mouad BANI <mouad-mb@outlook.com>
Date: Thu, 26 Mar 2026 13:49:19 +0100
Subject: [PATCH 16/16] fix: undo extensionless files optimization

Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
---
 .../src/crowdgit/services/maintainer/maintainer_service.py   | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
index 455500bb3e..e859d085fa 100644
--- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
+++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py
@@ -446,8 +446,7 @@ async def _list_repo_files(self, repo_path: str) -> list[str]:
         """List non-code files in the repo recursively, filtered by VALID_EXTENSIONS."""
         glob_args = ["--glob", "!.git/"]
         for ext in self.VALID_EXTENSIONS:
-            if ext:
-                glob_args.extend(["--iglob", f"*{ext}"])
+            glob_args.extend(["--iglob", f"*{ext}"])
 
         output = await run_shell_command(
             ["rg", "--files", "--hidden", *glob_args, "."], cwd=repo_path
@@ -455,7 +454,7 @@ async def _list_repo_files(self, repo_path: str) -> list[str]:
         return [
             line[2:] if line.startswith("./") else line
             for line in output.strip().split("\n")
-            if line.strip() and os.path.splitext(line)[1] in self.VALID_EXTENSIONS
+            if line.strip()
         ]
 
     async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) -> list[str]: