summaryrefslogtreecommitdiff
path: root/src/util/language.rs
blob: c9bd1ead2cfa0c4b6610716a84f3a61ab4ad5b56 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
use std::path::Path;

pub fn detect_language(filename: &Path, contents: &str) -> &'static str {
    detect_from_extension(filename)
        .or_else(|| detect_from_shebang(contents))
        .unwrap_or("")
}

fn detect_from_extension(filename: &Path) -> Option<&'static str> {
    let ext = filename
        .extension()
        .and_then(|e| e.to_str())
        .map(|e| e.to_ascii_lowercase());
    let ext_str = ext.as_deref();
    match ext_str {
        Some("rs") => Some("rust"),
        Some("py") => Some("python"),
        Some("json") => Some("json"),
        Some("toml") => Some("toml"),
        Some("yaml") | Some("yml") => Some("yaml"),
        Some("js") | Some("jsx") => Some("javascript"),
        Some("ts") | Some("tsx") => Some("typescript"),
        _ => None,
    }
}

fn detect_from_shebang(contents: &str) -> Option<&'static str> {
    if let Some(first_line) = contents.lines().next()
        && first_line.starts_with("#!")
    {
        if first_line.contains("python") {
            return Some("python");
        }
        if first_line.contains("bash") || first_line.contains("/sh") || first_line.contains(" sh") {
            return Some("bash");
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use std::path::Path;

    use super::detect_language;

    #[test]
    fn non_shebang_line_does_not_trigger_detection() {
        let contents = "this mentions bash but is not a shebang";
        assert_eq!(detect_language(Path::new("file"), contents), "");
    }

    #[test]
    fn detects_python_from_shebang() {
        let contents = "#!/usr/bin/python3\nprint('hi')";
        assert_eq!(detect_language(Path::new("file"), contents), "python");
    }

    #[test]
    fn sh_shebang_maps_to_bash_for_llm() {
        let contents = "#!/bin/sh\necho hi";
        assert_eq!(detect_language(Path::new("file"), contents), "bash");
    }

    #[test]
    fn env_shebang_is_detected() {
        let contents = "#!/usr/bin/env sh\necho hi";
        assert_eq!(detect_language(Path::new("file"), contents), "bash");
    }

    #[test]
    fn extension_overrides_shebang() {
        let contents = "#!/bin/bash\nprint('hello')";
        assert_eq!(detect_language(Path::new("main.py"), contents), "python");
    }

    #[test]
    fn javascript_family_extensions_map_correctly() {
        let cases = [
            ("file.js", "javascript"),
            ("file.jsx", "javascript"),
            ("file.ts", "typescript"),
            ("file.tsx", "typescript"),
        ];
        for (filename, expected) in cases {
            assert_eq!(detect_language(Path::new(filename), ""), expected);
        }
    }

    #[test]
    fn json_derived_extensions_map_correctly() {
        let cases = [
            ("file.json", "json"),
            ("file.yml", "yaml"),
            ("file.yaml", "yaml"),
        ];
        for (filename, expected) in cases {
            assert_eq!(detect_language(Path::new(filename), ""), expected);
        }
    }

    #[test]
    fn toml_extension_is_detected() {
        assert_eq!(detect_language(Path::new("Cargo.toml"), ""), "toml");
    }
}