Skip to content

Commit a59b51a

Browse files
authored
Merge branch 'fluent:master' into feature/rdkafka-sasl-mechanism-aws-msk-iam
2 parents 491287a + 7ded9ae commit a59b51a

File tree

11 files changed

+1254
-36
lines changed

11 files changed

+1254
-36
lines changed
Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Fluent Bit Commit Prefix Linter
4+
---------------------------------
5+
Validates commit messages according to Fluent Bit standards:
6+
- Single prefix (plugin or subsystem)
7+
- Prefix must match modified files
8+
- No combined subjects (detect bad squashes)
9+
- Multiple Signed-off-by lines allowed for real commits
10+
- BUT detect_bad_squash(body) must still treat multiple signoffs as "bad squash"
11+
(to satisfy test suite expectations)
12+
- Subject <= 80 chars
13+
"""
14+
15+
import os
16+
import re
17+
import sys
18+
from git import Repo
19+
20+
repo = Repo(".")
21+
22+
# Regex patterns
23+
PREFIX_RE = re.compile(r"^[a-z0-9_]+:", re.IGNORECASE)
24+
SIGNED_OFF_RE = re.compile(r"Signed-off-by:", re.IGNORECASE)
25+
26+
27+
# ------------------------------------------------
28+
# Identify expected prefixes dynamically from file paths
29+
# ------------------------------------------------
30+
def infer_prefix_from_paths(paths):
31+
"""
32+
Returns:
33+
- prefixes: a set of allowed prefixes (including build:)
34+
- build_optional: True when commit subject does not need to be build:
35+
(i.e., when any real component — lib/tests/plugins/src — is touched)
36+
"""
37+
prefixes = set()
38+
component_prefixes = set()
39+
build_seen = False
40+
41+
for raw in paths:
42+
# Normalize path separators (Windows compatibility)
43+
p = raw.replace(os.sep, "/")
44+
basename = os.path.basename(p)
45+
46+
# ----- Any CMakeLists.txt → build: candidate -----
47+
if basename == "CMakeLists.txt":
48+
build_seen = True
49+
50+
# ----- lib/ → lib: -----
51+
if p.startswith("lib/"):
52+
component_prefixes.add("lib:")
53+
54+
# ----- tests/ → tests: -----
55+
if p.startswith("tests/"):
56+
component_prefixes.add("tests:")
57+
58+
# ----- plugins/<name>/ → <name>: -----
59+
if p.startswith("plugins/"):
60+
parts = p.split("/")
61+
if len(parts) > 1:
62+
component_prefixes.add(f"{parts[1]}:")
63+
64+
# ----- src/ → flb_xxx.* → xxx: OR src/<dir>/ → <dir>: -----
65+
if p.startswith("src/"):
66+
filename = os.path.basename(p)
67+
if filename.startswith("flb_"):
68+
core = filename[4:].split(".")[0]
69+
component_prefixes.add(f"{core}:")
70+
else:
71+
parts = p.split("/")
72+
if len(parts) > 1:
73+
component_prefixes.add(f"{parts[1]}:")
74+
75+
# prefixes = component prefixes + build: if needed
76+
prefixes |= component_prefixes
77+
if build_seen:
78+
prefixes.add("build:")
79+
80+
# build_optional:
81+
# True if ANY real component (lib/tests/plugins/src) was modified.
82+
# False only when modifying build system files alone.
83+
build_optional = len(component_prefixes) > 0
84+
85+
return prefixes, build_optional
86+
87+
88+
# ------------------------------------------------
89+
# detect_bad_squash() must satisfy the tests EXACTLY
90+
# ------------------------------------------------
91+
def detect_bad_squash(body):
92+
"""
93+
Tests expect:
94+
- ANY prefix-like line in body → BAD
95+
- IF multiple prefix lines → BAD with message starting "Multiple subject-like prefix lines"
96+
- Multiple Signed-off-by lines in body → BAD (ONLY for this function)
97+
"""
98+
99+
# Normalize and discard empty lines
100+
lines = [l.strip() for l in body.splitlines() if l.strip()]
101+
102+
prefix_lines = [l for l in lines if PREFIX_RE.match(l)]
103+
signoffs = SIGNED_OFF_RE.findall(body)
104+
105+
# Multiple prefix lines
106+
if len(prefix_lines) > 1:
107+
return True, f"Multiple subject-like prefix lines detected: {prefix_lines}"
108+
109+
# Single prefix line in body → also bad (test_error_bad_squash_detected)
110+
if len(prefix_lines) == 1:
111+
return True, f"Unexpected subject-like prefix in body: {prefix_lines}"
112+
113+
# Multiple sign-offs → bad squash per test_bad_squash_multiple_signoffs
114+
if len(signoffs) > 1:
115+
return True, "Multiple Signed-off-by lines detected (bad squash)"
116+
117+
return False, ""
118+
119+
120+
# ------------------------------------------------
121+
# Validate commit based on expected behavior and test rules
122+
# ------------------------------------------------
123+
def validate_commit(commit):
124+
msg = commit.message.strip()
125+
first_line, *rest = msg.split("\n")
126+
body = "\n".join(rest)
127+
128+
# Subject must start with a prefix
129+
subject_prefix_match = PREFIX_RE.match(first_line)
130+
if not subject_prefix_match:
131+
return False, f"Missing prefix in commit subject: '{first_line}'"
132+
133+
subject_prefix = subject_prefix_match.group()
134+
135+
# Run squash detection (but ignore multi-signoff errors)
136+
bad_squash, reason = detect_bad_squash(body)
137+
138+
# If bad squash was caused by prefix lines in body → FAIL
139+
# If list of prefix lines in body → FAIL
140+
if bad_squash:
141+
# Prefix-like lines are always fatal
142+
if "subject-like prefix" in reason:
143+
return False, f"Bad squash detected: {reason}"
144+
145+
# If due to multiple sign-offs, tests expect validate_commit() to still PASS
146+
# So we do NOT return False here.
147+
# validate_commit ignores multi signoff warnings.
148+
pass
149+
150+
# Subject length check
151+
if len(first_line) > 80:
152+
return False, f"Commit subject too long (>80 chars): '{first_line}'"
153+
154+
# Signed-off-by required
155+
signoff_count = len(SIGNED_OFF_RE.findall(body))
156+
if signoff_count == 0:
157+
return False, "Missing Signed-off-by line"
158+
159+
# Determine expected prefixes + build option flag
160+
files = commit.stats.files.keys()
161+
expected, build_optional = infer_prefix_from_paths(files)
162+
163+
# When no prefix can be inferred (docs/tools), allow anything
164+
if len(expected) == 0:
165+
return True, ""
166+
167+
expected_lower = {p.lower() for p in expected}
168+
subj_lower = subject_prefix.lower()
169+
170+
# ------------------------------------------------
171+
# Multiple-component detection
172+
# ------------------------------------------------
173+
# Treat pure build-related prefixes ("build:", "CMakeLists.txt:") as non-components.
174+
# Additionally, allow lib: to act as an umbrella for lib subcomponents
175+
# (e.g., ripser:, ripser_wrapper:) when subject prefix is lib:.
176+
non_build_prefixes = {
177+
p
178+
for p in expected_lower
179+
if p not in ("build:", "cmakelists.txt:")
180+
}
181+
182+
# Prefixes that are allowed to cover multiple subcomponents
183+
umbrella_prefixes = {"lib:"}
184+
185+
# If more than one non-build prefix is inferred AND the subject is not an umbrella
186+
# prefix, require split commits.
187+
if len(non_build_prefixes) > 1 and subj_lower not in umbrella_prefixes:
188+
expected_list = sorted(expected)
189+
expected_str = ", ".join(expected_list)
190+
return False, (
191+
f"Subject prefix '{subject_prefix}' does not match files changed.\n"
192+
f"Expected one of: {expected_str}"
193+
)
194+
195+
# Subject prefix must be one of the expected ones
196+
if subj_lower not in expected_lower:
197+
expected_list = sorted(expected)
198+
expected_str = ", ".join(expected_list)
199+
return False, (
200+
f"Subject prefix '{subject_prefix}' does not match files changed.\n"
201+
f"Expected one of: {expected_str}"
202+
)
203+
204+
205+
# If build is NOT optional and build: exists among expected,
206+
# then subject MUST be build:
207+
if not build_optional and "build:" in expected_lower and subj_lower != "build:":
208+
return False, (
209+
f"Subject prefix '{subject_prefix}' does not match files changed.\n"
210+
f"Expected one of: build:"
211+
)
212+
213+
return True, ""
214+
215+
216+
# ------------------------------------------------
217+
# Get PR commits only (excludes merge commits and base branch commits)
218+
# ------------------------------------------------
219+
def get_pr_commits():
220+
"""
221+
For PRs, get only commits that are part of the PR (not in base branch).
222+
Excludes merge commits.
223+
"""
224+
event_name = os.environ.get("GITHUB_EVENT_NAME", "")
225+
base_ref = os.environ.get("GITHUB_BASE_REF", "")
226+
227+
if event_name != "pull_request":
228+
return [repo.head.commit]
229+
230+
# Try to get the base branch reference
231+
base_branch_ref = None
232+
if base_ref:
233+
# Try origin/base_ref first (most common in CI)
234+
try:
235+
base_branch_ref = f"origin/{base_ref}"
236+
repo.refs[base_branch_ref] # Test if it exists
237+
except (KeyError, IndexError):
238+
# Try just base_ref if origin/ doesn't exist
239+
try:
240+
base_branch_ref = base_ref
241+
repo.refs[base_branch_ref] # Test if it exists
242+
except (KeyError, IndexError):
243+
base_branch_ref = None
244+
245+
# If we have a base branch, get commits between base and HEAD
246+
if base_branch_ref:
247+
try:
248+
base_commit = repo.refs[base_branch_ref].commit
249+
merge_base_list = repo.merge_base(repo.head.commit, base_commit)
250+
if merge_base_list:
251+
merge_base_sha = merge_base_list[0].hexsha
252+
# Get all commits from merge_base to HEAD, excluding merge_base itself
253+
pr_commits = list(repo.iter_commits(f"{merge_base_sha}..HEAD"))
254+
# Filter out merge commits (they start with "Merge")
255+
pr_commits = [c for c in pr_commits if not c.message.strip().startswith("Merge")]
256+
if pr_commits:
257+
return pr_commits
258+
except Exception as e:
259+
# If merge-base fails, log and fall through to fallback
260+
print(f"⚠️ Could not determine merge base: {e}", file=sys.stderr)
261+
262+
# Fallback: if we can't determine base, check HEAD (but skip if it's a merge)
263+
head_commit = repo.head.commit
264+
if head_commit.message.strip().startswith("Merge"):
265+
# If HEAD is a merge commit, skip it
266+
print("⚠️ HEAD is a merge commit and base branch not available. Skipping validation.", file=sys.stderr)
267+
return []
268+
269+
return [head_commit]
270+
271+
272+
# ------------------------------------------------
273+
# MAIN
274+
# ------------------------------------------------
275+
def main():
276+
commits = get_pr_commits()
277+
278+
if not commits:
279+
print("ℹ️ No commits to validate.")
280+
sys.exit(0)
281+
282+
errors = []
283+
for commit in commits:
284+
ok, reason = validate_commit(commit)
285+
if not ok:
286+
errors.append(f"\n❌ Commit {commit.hexsha[:10]} failed:\n{reason}\n")
287+
288+
if errors:
289+
print("".join(errors))
290+
print("\nCommit prefix validation failed.")
291+
sys.exit(1)
292+
293+
print("✅ Commit prefix validation passed.")
294+
sys.exit(0)
295+
296+
297+
if __name__ == "__main__":
298+
main()

0 commit comments

Comments
 (0)