-
Notifications
You must be signed in to change notification settings - Fork 9
Syntect highlighting experiment #73
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
537364c
820688e
9e63cbe
7395f56
6fb54e1
17068a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,252 @@ | ||
| use anyhow::{anyhow, Result}; | ||
| use crossterm::style; | ||
| use syntect::{ | ||
| easy::HighlightLines, | ||
| highlighting::{Color, FontStyle, Style, Theme, ThemeSet}, | ||
| parsing::{SyntaxReference, SyntaxSet}, | ||
| util::as_24_bit_terminal_escaped, | ||
| }; | ||
|
|
||
| use crate::hunk::{DiffLineType, Hunk}; | ||
|
|
||
| // Diff styles | ||
| // TODO(cptp): make colors configurable | ||
|
|
||
| const HEADER_MARKER_STYLE: Style = Style { | ||
| foreground: Color::WHITE, | ||
| background: Color::BLACK, | ||
| font_style: FontStyle::empty(), | ||
| }; | ||
|
|
||
| const MARKER_ADDED: &str = "\u{258c}"; | ||
| const MARKER_ADDED_STYLE: Style = Style { | ||
| foreground: Color { | ||
| r: 0x78, | ||
| g: 0xde, | ||
| b: 0x0c, | ||
| a: 0xff, | ||
| }, | ||
| background: Color { | ||
| r: 0x0a, | ||
| g: 0x28, | ||
| b: 0x00, | ||
| a: 0xff, | ||
| }, | ||
| font_style: FontStyle::empty(), | ||
| }; | ||
|
|
||
| const MARKER_REMOVED: &str = "\u{258c}"; | ||
| const MARKER_REMOVED_STYLE: Style = Style { | ||
| foreground: Color { | ||
| r: 0xd3, | ||
| g: 0x2e, | ||
| b: 0x09, | ||
| a: 0xff, | ||
| }, | ||
| background: Color { | ||
| r: 0x3f, | ||
| g: 0x0e, | ||
| b: 0x00, | ||
| a: 0xff, | ||
| }, | ||
| font_style: FontStyle::empty(), | ||
| }; | ||
|
|
||
| // const BG_ADDED_STRONG: Color = Color { | ||
| // r: 0x11, | ||
| // g: 0x4f, | ||
| // b: 0x05, | ||
| // a: 0xff, | ||
| // }; | ||
| // const BG_REMOVED_STRONG: Color = Color { | ||
| // r: 0x77, | ||
| // g: 0x23, | ||
| // b: 0x05, | ||
| // a: 0xff, | ||
| // }; | ||
|
|
||
| /// Highlighter to use for | ||
| #[derive(Debug)] | ||
| pub enum DiffHighlighter { | ||
| Simple { | ||
| color_added: crossterm::style::Color, | ||
| color_removed: crossterm::style::Color, | ||
| }, | ||
| Syntect { | ||
| syntax_set: SyntaxSet, | ||
| theme: Box<Theme>, | ||
| }, | ||
| } | ||
|
|
||
| // "base16-eighties.dark" | ||
| impl DiffHighlighter { | ||
| pub fn syntect(theme_name: &str) -> Result<Self> { | ||
| let theme_set = ThemeSet::load_defaults(); | ||
| Ok(Self::Syntect { | ||
| syntax_set: SyntaxSet::load_defaults_newlines(), | ||
| theme: Box::new( | ||
| theme_set | ||
| .themes | ||
| .get(theme_name) | ||
| .ok_or_else(|| anyhow!("Theme '{theme_name}' not found."))? | ||
| .clone(), | ||
| ), | ||
| }) | ||
| } | ||
|
|
||
| // FIXME: it's a bit odd that this is passed as an extra option. | ||
| // Maybe use a "specialized" enum instead that wraps it with the sytect options. | ||
| /// Get file type specific syntax for syntect highlighting. | ||
| /// | ||
| /// Returns None if the `DiffHighlighter::Simple` is used. | ||
| pub fn get_syntax(&self, path: &str) -> Option<&SyntaxReference> { | ||
| match self { | ||
| Self::Simple { .. } => None, | ||
| Self::Syntect { syntax_set, .. } => { | ||
| // TODO: probably better to use std::path? | ||
| let file_ext = path.rsplit('.').next().unwrap_or(""); | ||
| Some( | ||
| syntax_set | ||
| .find_syntax_by_extension(file_ext) | ||
| .unwrap_or_else(|| syntax_set.find_syntax_plain_text()), | ||
| ) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| pub fn highlight_hunk( | ||
| hunk: &Hunk, | ||
| hl: &DiffHighlighter, | ||
| syntax: Option<&SyntaxReference>, | ||
| ) -> String { | ||
| match hl { | ||
| DiffHighlighter::Simple { | ||
| color_added, | ||
| color_removed, | ||
| } => highlight_hunk_simple(hunk, *color_added, *color_removed), | ||
| DiffHighlighter::Syntect { syntax_set, theme } => { | ||
| highlight_hunk_syntect(hunk, syntax_set, theme, syntax.unwrap()) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn highlight_hunk_simple( | ||
| hunk: &Hunk, | ||
| color_added: crossterm::style::Color, | ||
| color_removed: crossterm::style::Color, | ||
| ) -> String { | ||
| let mut buf = String::new(); | ||
| let color_added = style::SetForegroundColor(color_added).to_string(); | ||
| let color_removed = style::SetForegroundColor(color_removed).to_string(); | ||
|
|
||
| let (header_marker, header_content) = hunk.header(); | ||
| buf.push_str(header_marker); | ||
| buf.push_str(header_content); | ||
| buf.push('\n'); | ||
|
|
||
| for (line_type, line_content) in hunk.lines() { | ||
| match line_type { | ||
| DiffLineType::Unchanged => { | ||
| buf.push(' '); | ||
| } | ||
| DiffLineType::Added => { | ||
| buf.push_str(&color_added); | ||
| buf.push('+'); | ||
| } | ||
| DiffLineType::Removed => { | ||
| buf.push_str(&color_removed); | ||
| buf.push('-'); | ||
| } | ||
| } | ||
| buf.push_str(line_content); | ||
| buf.push('\n'); | ||
| } | ||
|
|
||
| // workaround: remove trailing line break | ||
| buf.pop(); | ||
| buf.push_str("\x1b[0m"); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Haven't investigated but my suspicion is that resetting attributes this way will break things if the user has a custom background color set. The |
||
| buf | ||
| } | ||
|
|
||
| fn highlight_hunk_syntect( | ||
| hunk: &Hunk, | ||
| syntax_set: &SyntaxSet, | ||
| theme: &Theme, | ||
| syntax: &SyntaxReference, | ||
| ) -> String { | ||
| // TODO: move somewhere else? | ||
| let marker_added = as_24_bit_terminal_escaped(&[(MARKER_ADDED_STYLE, MARKER_ADDED)], true); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is unclear to me why we use |
||
| let marker_removed = | ||
| as_24_bit_terminal_escaped(&[(MARKER_REMOVED_STYLE, MARKER_REMOVED)], true); | ||
|
|
||
| // separate highlighters for added and removed lines to keep the syntax intact | ||
| let mut hl_add = HighlightLines::new(syntax, theme); | ||
| let mut hl_rem = HighlightLines::new(syntax, theme); | ||
|
|
||
| let mut buf = String::new(); | ||
|
|
||
| let (header_marker, header_content) = { | ||
| let header = hunk.header(); | ||
| let header_content = hl_add | ||
| .highlight_line(header.1, syntax_set) | ||
| .and_then(|_| hl_rem.highlight_line(header.1, syntax_set)); | ||
| ( | ||
| as_24_bit_terminal_escaped(&[(HEADER_MARKER_STYLE, header.0)], false), | ||
| header_content.map_or_else( | ||
| |_| header.1.to_owned(), | ||
| |content| as_24_bit_terminal_escaped(&content, false), | ||
| ), | ||
| ) | ||
| }; | ||
|
|
||
| buf.push_str(&header_marker); | ||
| buf.push_str(&header_content); | ||
| buf.push('\n'); | ||
|
|
||
| for (line_type, line_content) in hunk.lines() { | ||
| let ranges = match line_type { | ||
| DiffLineType::Unchanged => hl_add | ||
| .highlight_line(line_content, syntax_set) | ||
| .and_then(|_| hl_rem.highlight_line(line_content, syntax_set)), | ||
| DiffLineType::Added => hl_add.highlight_line(line_content, syntax_set), | ||
| DiffLineType::Removed => hl_rem.highlight_line(line_content, syntax_set), | ||
| }; | ||
|
|
||
| let Ok(mut ranges) = ranges else { | ||
| buf.push_str(line_content); | ||
| continue; | ||
| }; | ||
|
|
||
| let bg = match line_type { | ||
| DiffLineType::Unchanged => { | ||
| buf.push(' '); | ||
| false | ||
| } | ||
| DiffLineType::Added => { | ||
| buf.push_str(&marker_added); | ||
| for r in &mut ranges { | ||
| r.0.background = MARKER_ADDED_STYLE.background; | ||
| } | ||
| true | ||
| } | ||
| DiffLineType::Removed => { | ||
| buf.push_str(&marker_removed); | ||
| for r in &mut ranges { | ||
| r.0.background = MARKER_REMOVED_STYLE.background; | ||
| } | ||
| true | ||
| } | ||
| }; | ||
|
|
||
| let highlighted_content = as_24_bit_terminal_escaped(&ranges, bg); | ||
| buf.push_str(&highlighted_content); | ||
| buf.push('\n'); | ||
| } | ||
| // workaround: remove trailing line break | ||
| buf.pop(); | ||
|
|
||
| // according to docs of `as_24_bit_terminal_escaped` | ||
| buf.push_str("\x1b[0m"); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See prior comment |
||
| buf | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| use std::ops::Range; | ||
|
|
||
| use anyhow::{anyhow, bail, Result}; | ||
| use line_span::{LineSpan, LineSpans}; | ||
|
|
||
| pub struct Hunk { | ||
| /// Raw hunk string. | ||
| /// | ||
| /// This may not be modified after the hunk was created, as the other fields index into it. | ||
| raw_hunk: String, | ||
| /// Header marker and content. | ||
| header: (Range<usize>, Range<usize>), | ||
| /// Ranges of `raw_hunk` for each line with the corresponding diff type. | ||
| /// | ||
| /// Doesn't include the starting character of the diff line (`+/-/ `) or the line break. | ||
| lines: Vec<HunkLine>, | ||
| } | ||
|
|
||
| impl Hunk { | ||
| pub fn from_string(raw_hunk: String) -> Result<Self> { | ||
| let mut lines_iter = raw_hunk.line_spans(); | ||
| let header = match lines_iter.next().map(Self::parse_header) { | ||
| Some(Ok(header)) => header, | ||
| Some(Err(e)) => return Err(e), | ||
| _ => bail!("Empty hunk."), | ||
| }; | ||
| let lines = lines_iter | ||
| .map(HunkLine::from_line_span) | ||
| .collect::<Result<Vec<_>, _>>()?; | ||
| Ok(Self { | ||
| raw_hunk, | ||
| header, | ||
| lines, | ||
| }) | ||
| } | ||
|
|
||
| pub fn raw(&self) -> &str { | ||
| &self.raw_hunk | ||
| } | ||
|
|
||
| pub fn header(&self) -> (&str, &str) { | ||
| ( | ||
| &self.raw_hunk[self.header.0.clone()], | ||
| &self.raw_hunk[self.header.1.clone()], | ||
| ) | ||
| } | ||
|
|
||
| pub fn lines(&self) -> impl Iterator<Item = (DiffLineType, &str)> { | ||
| self.lines | ||
| .iter() | ||
| .map(|line| (line.diff_type, &self.raw_hunk[line.range.clone()])) | ||
| } | ||
|
|
||
| fn parse_header(line: LineSpan) -> Result<(Range<usize>, Range<usize>)> { | ||
| // TODO(cptp) | ||
| let range = line.range(); | ||
| Ok((range.start..range.start, range)) | ||
| } | ||
| } | ||
|
|
||
| #[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||
| pub enum DiffLineType { | ||
| Unchanged, | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bikeshed (not important): how do we feel about changing this variant to
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, I'm not familiar with the exact naming in git so I just chose something. I'll also use "deleted" instead of "removed" since I think I saw these used in other parts of gex.
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't pay it much thought before but come to think of it, git actually uses "old" and "new" where in gex we use "deletion" and "addition", so it's already inconsistent. To be honest old and new don't sound as obvious to me personally, could just be my bias. Anyway, this is far from an important point about the code so I will stop fixating on it 😅 |
||
| Added, | ||
| Removed, | ||
| } | ||
|
|
||
| impl TryFrom<char> for DiffLineType { | ||
| type Error = anyhow::Error; | ||
|
|
||
| fn try_from(value: char) -> Result<Self> { | ||
| match value { | ||
| ' ' => Ok(Self::Unchanged), | ||
| '+' => Ok(Self::Added), | ||
| '-' => Ok(Self::Removed), | ||
| c => Err(anyhow!("'{c}' is not a valid diff type character.")), | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the case of an added trailing newline this will cause a bug (see #62) |
||
| } | ||
| } | ||
| } | ||
|
|
||
| /// A single line (range) of a hunk. | ||
| struct HunkLine { | ||
| /// Type of the line. | ||
| diff_type: DiffLineType, | ||
| /// The range of the line in the original string (see [`Hunk`]). | ||
| /// | ||
| /// Doesn't include the first character of the original line (`+`/`-`/` `). | ||
| range: Range<usize>, | ||
| } | ||
|
|
||
| impl HunkLine { | ||
| fn from_line_span(line: LineSpan) -> Result<Self> { | ||
| let Some(first_char) = line.chars().next() else { | ||
| bail!(""); | ||
| }; | ||
| let line_range = line.range(); | ||
| let diff_type = DiffLineType::try_from(first_char) | ||
| // fallback to unchanged | ||
| // TODO(cptp): this shouldn't happen, panic instead? | ||
| .unwrap_or(DiffLineType::Unchanged); | ||
| let range = (line_range.start + first_char.len_utf8())..line_range.end; | ||
| Ok(Self { diff_type, range }) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally, I think the default syntax highlighting theme should come from the terminal's theme (just as the default gex colour theme does). This should make life a bit easier for users as some colour coherency is ensured out of the box.
I am not sure of the feasibility of this since Syntect only accepts colors as RGBA. This StackOverflow answer suggests that in at least xterm you can query the current terminal colors, but I haven't investigated how cross-platform this approach is, and I don't think it's supported in Crossterm.
This will be a decent amount of work to look into and is definitely not blocking for this PR - but my gut feeling is that as long as this isn't possible, then syntax highlighting should be disabled by default. Let me know your thoughts.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if this is a viable solution since, as you mentioned there doesn't seem to be a reliable way to get the terminal's colors.
I think a "better" way could be creating a custom syntect theme and then map the
Styles back to the 16 color ANSI sequenses and display them using crossterm. But I don't think I'll explore that option in this PR for now.Either way, I agree that gex should be usable out-of-the-box even on terminals that don't support true-color / 24 bit colors. So yeah, the highlighting should be disabled by default.