summaryrefslogtreecommitdiff
path: root/src/util
diff options
context:
space:
mode:
authorA Farzat <a@farzat.xyz>2026-06-07 21:12:50 +0300
committerA Farzat <a@farzat.xyz>2026-06-07 21:12:50 +0300
commitb3fe9879da5386df853c4c5d583eb040341195da (patch)
tree5cd488e3253eb0ded8194b00908217b7331999ea /src/util
parent133dba2bc968c9081144887ceb19bd9711c16df6 (diff)
downloadrepo2markdown-b3fe9879da5386df853c4c5d583eb040341195da.tar.gz
repo2markdown-b3fe9879da5386df853c4c5d583eb040341195da.zip
Detect sh as bash
This is based on the assumption that LLMs recognize bash language fences more as they are trained more on them. Another assumption is that LLMs wouldn't care much about the difference unless instructed.
Diffstat (limited to 'src/util')
-rw-r--r--src/util/language.rs14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/util/language.rs b/src/util/language.rs
index 8819a1e..8da45b8 100644
--- a/src/util/language.rs
+++ b/src/util/language.rs
@@ -27,7 +27,7 @@ fn detect_from_shebang(contents: &str) -> Option<&'static str> {
if first_line.contains("python") {
return Some("python");
}
- if first_line.contains("bash") {
+ if first_line.contains("bash") || first_line.contains("/sh") || first_line.contains(" sh") {
return Some("bash");
}
}
@@ -51,4 +51,16 @@ mod tests {
let contents = "#!/usr/bin/python3\nprint('hi')";
assert_eq!(detect_language(Path::new("file"), contents), "python");
}
+
+ #[test]
+ fn sh_shebang_maps_to_bash_for_llm() {
+ let contents = "#!/bin/sh\necho hi";
+ assert_eq!(detect_language(Path::new("file"), contents), "bash");
+ }
+
+ #[test]
+ fn env_shebang_is_detected() {
+ let contents = "#!/usr/bin/env sh\necho hi";
+ assert_eq!(detect_language(Path::new("file"), contents), "bash");
+ }
}