ruka_mir/
naming.rs

1//! Stable naming helpers used by MIR browser graphs and backends.
2
3use std::collections::HashSet;
4
5use cranelift_entity::EntityRef;
6use heck::ToSnakeCase;
7use unicode_ident::{is_xid_continue, is_xid_start};
8
9use crate::{LocalKind, MirFuncId, MirFunction, MirLocalId, MirProgram};
10
11/// Stable synthesized identifiers for functions in a MIR program.
12#[derive(Debug, Clone)]
13pub struct ProgramNames {
14    function_idents: Vec<String>,
15}
16
17impl ProgramNames {
18    /// Build a stable set of function identifiers for a MIR program.
19    pub fn from_program(program: &MirProgram) -> Self {
20        let mut used = HashSet::new();
21        let mut function_idents = Vec::with_capacity(program.functions.len());
22        for (func_id, function) in program.functions.iter() {
23            let base = if function.name.is_empty() {
24                format!("fn_{}", func_id.index())
25            } else {
26                format!("fn_{}", mangle_canonical_ident(&function.name))
27            };
28            let ident = unique_name(&base, &mut used);
29            function_idents.push(ident);
30        }
31        Self { function_idents }
32    }
33
34    /// Return the emitted identifier for a MIR function id.
35    pub fn function_ident(&self, func_id: MirFuncId) -> &str {
36        &self.function_idents[func_id.index()]
37    }
38}
39
40/// Stable synthesized identifiers for locals in one MIR function.
41#[derive(Debug, Clone)]
42pub struct FunctionNames {
43    local_idents: Vec<String>,
44}
45
46impl FunctionNames {
47    /// Build stable local identifiers for a MIR function.
48    pub fn from_function(function: &MirFunction) -> Self {
49        let mut used = HashSet::new();
50        let mut local_idents = Vec::with_capacity(function.locals.len());
51
52        for (local_id, info) in function.locals.iter() {
53            let base = match info.kind {
54                LocalKind::Param => match &info.debug_name {
55                    Some(name) if !name.is_empty() => format!("p_{name}"),
56                    _ => format!("p_{}", local_id.index()),
57                },
58                LocalKind::Binding => match &info.debug_name {
59                    Some(name) if !name.is_empty() => format!("v_{name}"),
60                    _ => format!("v_{}", local_id.index()),
61                },
62                LocalKind::Temp => match &info.debug_name {
63                    Some(name) if !name.is_empty() => format!("t_{name}"),
64                    _ => format!("t_{}", local_id.index()),
65                },
66            };
67
68            let ident = unique_name(&sanitize_ident(&base, "t_"), &mut used);
69            local_idents.push(ident);
70        }
71
72        Self { local_idents }
73    }
74
75    /// Return the emitted identifier for a MIR local id.
76    pub fn local_ident(&self, local_id: MirLocalId) -> &str {
77        &self.local_idents[local_id.index()]
78    }
79
80    /// Build an identifier for an incoming block parameter binding.
81    pub fn incoming_param_ident(&self, local_id: MirLocalId, index: usize) -> String {
82        format!("in_{}_{}", self.local_ident(local_id), index)
83    }
84}
85
86/// Build a valid synthetic temporary identifier for generated Rust.
87///
88/// ```
89/// let ident = ruka_mir::naming::synthetic_temp_ident("match-value");
90/// assert_eq!(ident, "t_match_value");
91/// ```
92pub fn synthetic_temp_ident(stem: &str) -> String {
93    sanitize_ident(&format!("t_{stem}"), "t")
94}
95
96/// Mangle a canonical source-level identifier into a Rust-compatible name.
97pub fn mangle_canonical_ident(name: &str) -> String {
98    let mut out = String::new();
99    let mut chars = name.chars().peekable();
100
101    while let Some(ch) = chars.next() {
102        match ch {
103            '_' => out.push_str("__"),
104            '[' => out.push_str("_l"),
105            ']' => out.push_str("_r"),
106            ';' => out.push_str("_m"),
107            ':' => {
108                if chars.peek() == Some(&':') {
109                    chars.next();
110                    out.push_str("_d");
111                } else {
112                    panic!("unsupported character in canonical name for mangling: standalone `:`");
113                }
114            }
115            ',' => {
116                if chars.peek() == Some(&' ') {
117                    chars.next();
118                    out.push_str("_c");
119                } else {
120                    panic!("unsupported character in canonical name for mangling: expected `, `");
121                }
122            }
123            'A'..='Z' | 'a'..='z' | '0'..='9' => out.push(ch),
124            _ => panic!("unsupported character in canonical name for mangling: `{ch}`"),
125        }
126    }
127
128    if out.is_empty() {
129        panic!("cannot mangle empty canonical name");
130    }
131    if out.as_bytes()[0].is_ascii_digit() {
132        panic!("canonical name cannot start with a digit: `{name}`");
133    }
134    out
135}
136
137fn unique_name(base: &str, used: &mut HashSet<String>) -> String {
138    if used.insert(base.to_owned()) {
139        return base.to_owned();
140    }
141
142    let mut suffix = 2usize;
143    loop {
144        let candidate = format!("{base}_{suffix}");
145        if used.insert(candidate.clone()) {
146            return candidate;
147        }
148        suffix += 1;
149    }
150}
151
152fn sanitize_ident(raw: &str, fallback_prefix: &str) -> String {
153    let snake = raw.to_snake_case();
154    let mut out = String::new();
155
156    for (index, ch) in snake.chars().enumerate() {
157        if index == 0 {
158            if is_xid_start(ch) || ch == '_' {
159                out.push(ch);
160            } else if is_xid_continue(ch) {
161                out.push('_');
162                out.push(ch);
163            } else {
164                out.push('_');
165            }
166        } else if is_xid_continue(ch) {
167            out.push(ch);
168        } else {
169            out.push('_');
170        }
171    }
172
173    if out.is_empty() || out == "_" {
174        out = fallback_prefix.to_owned();
175    }
176
177    if is_rust_keyword(&out) {
178        out.push('_');
179    }
180
181    out
182}
183
184fn is_rust_keyword(ident: &str) -> bool {
185    matches!(
186        ident,
187        "as" | "break"
188            | "const"
189            | "continue"
190            | "crate"
191            | "else"
192            | "enum"
193            | "extern"
194            | "false"
195            | "fn"
196            | "for"
197            | "if"
198            | "impl"
199            | "in"
200            | "let"
201            | "loop"
202            | "match"
203            | "mod"
204            | "move"
205            | "mut"
206            | "pub"
207            | "ref"
208            | "return"
209            | "self"
210            | "Self"
211            | "static"
212            | "struct"
213            | "super"
214            | "trait"
215            | "true"
216            | "type"
217            | "unsafe"
218            | "use"
219            | "where"
220            | "while"
221            | "async"
222            | "await"
223            | "dyn"
224            | "abstract"
225            | "become"
226            | "box"
227            | "do"
228            | "final"
229            | "macro"
230            | "override"
231            | "priv"
232            | "try"
233            | "typeof"
234            | "unsized"
235            | "virtual"
236            | "yield"
237    )
238}
239
240#[cfg(test)]
241mod tests {
242    use super::mangle_canonical_ident;
243
244    #[test]
245    fn mangles_canonical_names_with_pair_escapes() {
246        assert_eq!(
247            mangle_canonical_ident("Pair[i64, String]"),
248            "Pair_li64_cString_r"
249        );
250        assert_eq!(
251            mangle_canonical_ident("Boxed[i64]::clone_from"),
252            "Boxed_li64_r_dclone__from"
253        );
254    }
255
256    #[test]
257    #[should_panic(expected = "expected `, `")]
258    fn rejects_comma_without_space() {
259        let _ = mangle_canonical_ident("Pair[i64,String]");
260    }
261}