diff options
| author | A Farzat <a@farzat.xyz> | 2026-06-07 21:12:50 +0300 |
|---|---|---|
| committer | A Farzat <a@farzat.xyz> | 2026-06-07 21:12:50 +0300 |
| commit | b3fe9879da5386df853c4c5d583eb040341195da (patch) | |
| tree | 5cd488e3253eb0ded8194b00908217b7331999ea /src/util | |
| parent | 133dba2bc968c9081144887ceb19bd9711c16df6 (diff) | |
| download | repo2markdown-b3fe9879da5386df853c4c5d583eb040341195da.tar.gz repo2markdown-b3fe9879da5386df853c4c5d583eb040341195da.zip | |
Detect sh as bash
This is based on the assumption that LLMs recognize bash language fences
more as they are trained more on them. Another assumption is that LLMs
wouldn't care much about the difference unless instructed.
Diffstat (limited to 'src/util')
| -rw-r--r-- | src/util/language.rs | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/src/util/language.rs b/src/util/language.rs index 8819a1e..8da45b8 100644 --- a/src/util/language.rs +++ b/src/util/language.rs @@ -27,7 +27,7 @@ fn detect_from_shebang(contents: &str) -> Option<&'static str> { if first_line.contains("python") { return Some("python"); } - if first_line.contains("bash") { + if first_line.contains("bash") || first_line.contains("/sh") || first_line.contains(" sh") { return Some("bash"); } } @@ -51,4 +51,16 @@ mod tests { let contents = "#!/usr/bin/python3\nprint('hi')"; assert_eq!(detect_language(Path::new("file"), contents), "python"); } + + #[test] + fn sh_shebang_maps_to_bash_for_llm() { + let contents = "#!/bin/sh\necho hi"; + assert_eq!(detect_language(Path::new("file"), contents), "bash"); + } + + #[test] + fn env_shebang_is_detected() { + let contents = "#!/usr/bin/env sh\necho hi"; + assert_eq!(detect_language(Path::new("file"), contents), "bash"); + } } |
