summaryrefslogtreecommitdiff
path: root/src/util/language.rs
diff options
context:
space:
mode:
authorA Farzat <a@farzat.xyz>2026-06-07 21:12:50 +0300
committerA Farzat <a@farzat.xyz>2026-06-07 21:12:50 +0300
commitb3fe9879da5386df853c4c5d583eb040341195da (patch)
tree5cd488e3253eb0ded8194b00908217b7331999ea /src/util/language.rs
parent133dba2bc968c9081144887ceb19bd9711c16df6 (diff)
downloadrepo2markdown-b3fe9879da5386df853c4c5d583eb040341195da.tar.gz
repo2markdown-b3fe9879da5386df853c4c5d583eb040341195da.zip
Detect sh as bash
This is based on the assumption that LLMs recognize bash language fences more as they are trained more on them. Another assumption is that LLMs wouldn't care much about the difference unless instructed.
Diffstat (limited to 'src/util/language.rs')
-rw-r--r--src/util/language.rs14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/util/language.rs b/src/util/language.rs
index 8819a1e..8da45b8 100644
--- a/src/util/language.rs
+++ b/src/util/language.rs
@@ -27,7 +27,7 @@ fn detect_from_shebang(contents: &str) -> Option<&'static str> {
if first_line.contains("python") {
return Some("python");
}
- if first_line.contains("bash") {
+ if first_line.contains("bash") || first_line.contains("/sh") || first_line.contains(" sh") {
return Some("bash");
}
}
@@ -51,4 +51,16 @@ mod tests {
let contents = "#!/usr/bin/python3\nprint('hi')";
assert_eq!(detect_language(Path::new("file"), contents), "python");
}
+
+ #[test]
+ fn sh_shebang_maps_to_bash_for_llm() {
+ let contents = "#!/bin/sh\necho hi";
+ assert_eq!(detect_language(Path::new("file"), contents), "bash");
+ }
+
+ #[test]
+ fn env_shebang_is_detected() {
+ let contents = "#!/usr/bin/env sh\necho hi";
+ assert_eq!(detect_language(Path::new("file"), contents), "bash");
+ }
}