From e885d13d1618d638612e7ad83aa317476552b870 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Thu, 20 Oct 2011 16:40:02 +0300 Subject: [PATCH 1/5] Ticket #1652: autodetect line-endings * On opening file, detect line-endings used by sampling some initial content. * If it happen to be CR or CRLF, skip fast load path, and in edit_insert_file() convert such line endings to '\n'. * Save detected line ending type for editor. Signed-off-by: Paul Sokolovsky Signed-off-by: Slava Zanko --- src/editor/edit-impl.h | 2 +- src/editor/edit.c | 60 ++++++++++++++++++++++++++++++++++++++---- src/editor/editcmd.c | 12 +++++---- src/editor/editdraw.c | 17 +++++++++--- 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/src/editor/edit-impl.h b/src/editor/edit-impl.h index 14dea5900b..e46701b5df 100644 --- a/src/editor/edit-impl.h +++ b/src/editor/edit-impl.h @@ -221,7 +221,7 @@ gboolean edit_save_block (WEdit * edit, const char *filename, off_t start, off_t gboolean edit_save_block_cmd (WEdit * edit); gboolean edit_insert_file_cmd (WEdit * edit); -off_t edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath); +off_t edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath, LineBreaks lb_type); gboolean edit_load_back_cmd (WEdit * edit); gboolean edit_load_forward_cmd (WEdit * edit); void edit_block_process_cmd (WEdit * edit, int macro_number); diff --git a/src/editor/edit.c b/src/editor/edit.c index f3b86adb4a..5ead0caae6 100644 --- a/src/editor/edit.c +++ b/src/editor/edit.c @@ -114,6 +114,8 @@ const char VERTICAL_MAGIC[] = { '\1', '\1', '\1', '\1', '\n' }; #define space_width 1 +#define DETECT_LB_TYPE_BUFLEN BUF_MEDIUM + /*** file scope type declarations ****************************************************************/ /*** file scope variables ************************************************************************/ @@ -378,6 +380,37 @@ check_file_access (WEdit * edit, const vfs_path_t * filename_vpath, struct stat /* --------------------------------------------------------------------------------------------- */ +/** + * detect type of line breaks + * + */ +/* --------------------------------------------------------------------------------------------- */ + +static LineBreaks +detect_lb_type (const vfs_path_t *filename_vpath) +{ + char buf[BUF_MEDIUM]; + ssize_t file, sz; + + file = mc_open (filename_vpath, O_RDONLY | O_BINARY); + if (file == -1) + return LB_ASIS; + + sz = mc_read (file, buf, sizeof (buf) - 1); + mc_close (file); + + if (sz <= 0) + return LB_ASIS; + + buf[(size_t) sz] = '\0'; + if (strstr (buf, "\r\n") != NULL) + return LB_WIN; + if (strchr (buf, '\r') != NULL) + return LB_MAC; + return LB_ASIS; +} + +/* --------------------------------------------------------------------------------------------- */ /** * Open the file and load it into the buffers, either directly or using * a filter. Return TRUE on success, FALSE on error. @@ -394,6 +427,7 @@ static gboolean edit_load_file (WEdit * edit) { gboolean fast_load = TRUE; + LineBreaks lb_type = LB_ASIS; /* Cannot do fast load if a filter is used */ if (edit_find_filter (edit->filename_vpath) >= 0) @@ -418,6 +452,10 @@ edit_load_file (WEdit * edit) edit_clean (edit); return FALSE; } + lb_type = detect_lb_type (edit->filename_vpath); + + if (lb_type != LB_ASIS && lb_type != LB_UNIX) + fast_load = FALSE; } else { @@ -443,7 +481,7 @@ edit_load_file (WEdit * edit) && *(vfs_path_get_by_index (edit->filename_vpath, 0)->path) != '\0') { edit->undo_stack_disable = 1; - if (edit_insert_file (edit, edit->filename_vpath) < 0) + if (edit_insert_file (edit, edit->filename_vpath, lb_type) < 0) { edit_clean (edit); return FALSE; @@ -451,7 +489,7 @@ edit_load_file (WEdit * edit) edit->undo_stack_disable = 0; } } - edit->lb = LB_ASIS; + edit->lb = lb_type; return TRUE; } @@ -1783,7 +1821,7 @@ user_menu (WEdit * edit, const char *menu_file, int selected_entry) { off_t ins_len; - ins_len = edit_insert_file (edit, block_file_vpath); + ins_len = edit_insert_file (edit, block_file_vpath, LB_ASIS); if (!nomark && ins_len > 0) edit_set_markers (edit, start_mark, start_mark + ins_len, 0, 0); } @@ -1937,7 +1975,7 @@ is_break_char (char c) /** inserts a file at the cursor, returns count of inserted bytes on success */ off_t -edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath) +edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath, LineBreaks lb_type) { char *p; off_t current; @@ -2027,7 +2065,19 @@ edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath) while ((blocklen = mc_read (file, (char *) buf, TEMP_BUF_LEN)) > 0) { for (i = 0; i < blocklen; i++) - edit_insert (edit, buf[i]); + { + if (buf[i] == '\r') + { + if (lb_type == LB_MAC) + edit_insert (edit, '\n'); + else if (lb_type == LB_WIN) + /* just skip */ ; + else + edit_insert (edit, '\r'); + } + else + edit_insert (edit, buf[i]); + } } /* highlight inserted text then not persistent blocks */ if (!option_persistent_selections && edit->modified) diff --git a/src/editor/editcmd.c b/src/editor/editcmd.c index ea9f49f5aa..bc9e24629a 100644 --- a/src/editor/editcmd.c +++ b/src/editor/editcmd.c @@ -474,7 +474,9 @@ edit_get_save_file_as (WEdit * edit) { char *fname; - edit->lb = cur_lb; + /* Don't change current LB type (possibly autodetected), unless user asked to. */ + if (cur_lb != LB_ASIS) + edit->lb = cur_lb; fname = tilde_expand (filename_res); g_free (filename_res); ret_vpath = vfs_path_from_str (fname); @@ -3013,7 +3015,7 @@ edit_paste_from_X_buf_cmd (WEdit * edit) /* try use external clipboard utility */ mc_event_raise (MCEVENT_GROUP_CORE, "clipboard_file_from_ext_clip", NULL); tmp = mc_config_get_full_vpath (EDIT_CLIP_FILE); - ret = (edit_insert_file (edit, tmp) >= 0); + ret = (edit_insert_file (edit, tmp, LB_ASIS) >= 0); vfs_path_free (tmp); return ret; @@ -3121,7 +3123,7 @@ edit_insert_file_cmd (WEdit * edit) vfs_path_t *exp_vpath; exp_vpath = vfs_path_from_str (exp); - ret = (edit_insert_file (edit, exp_vpath) >= 0); + ret = (edit_insert_file (edit, exp_vpath, LB_ASIS) >= 0); vfs_path_free (exp_vpath); if (!ret) @@ -3199,7 +3201,7 @@ edit_sort_cmd (WEdit * edit) vfs_path_t *tmp_vpath; tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE); - edit_insert_file (edit, tmp_vpath); + edit_insert_file (edit, tmp_vpath, LB_ASIS); vfs_path_free (tmp_vpath); } return 0; @@ -3246,7 +3248,7 @@ edit_ext_cmd (WEdit * edit) vfs_path_t *tmp_vpath; tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE); - edit_insert_file (edit, tmp_vpath); + edit_insert_file (edit, tmp_vpath, LB_ASIS); vfs_path_free (tmp_vpath); } return 0; diff --git a/src/editor/editdraw.c b/src/editor/editdraw.c index 0fd09c2dc1..44a1eb4ad5 100644 --- a/src/editor/editdraw.c +++ b/src/editor/editdraw.c @@ -108,6 +108,13 @@ status_string (WEdit * edit, char *s, int w) { char byte_str[16]; + static const char *lb_names[LB_NAMES] = { + "", + "LF", + "CRLF", + "CR" + }; + /* * If we are at the end of file, print , * otherwise print the current character as is (if printable), @@ -152,7 +159,7 @@ status_string (WEdit * edit, char *s, int w) /* The field lengths just prevent the status line from shortening too much */ if (simple_statusbar) g_snprintf (s, w, - "%c%c%c%c %3ld %5ld/%ld %6ld/%ld %s %s", + "%c%c%c%c %3ld %5ld/%ld %6ld/%ld %s %s %s", edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', edit->modified ? 'M' : '-', macro_index < 0 ? '-' : 'R', @@ -164,10 +171,11 @@ status_string (WEdit * edit, char *s, int w) #ifdef HAVE_CHARSET mc_global.source_codepage >= 0 ? get_codepage_id (mc_global.source_codepage) : #endif - ""); + "", + lb_names[edit->lb]); else g_snprintf (s, w, - "[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) %s %s", + "[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) %s %s %s", edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', edit->modified ? 'M' : '-', macro_index < 0 ? '-' : 'R', @@ -181,7 +189,8 @@ status_string (WEdit * edit, char *s, int w) #ifdef HAVE_CHARSET mc_global.source_codepage >= 0 ? get_codepage_id (mc_global.source_codepage) : #endif - ""); + "", + lb_names[edit->lb]); } /* --------------------------------------------------------------------------------------------- */ From 067742b35573708f386afe1e262db7788fb1abb4 Mon Sep 17 00:00:00 2001 From: Ilia Maslakov Date: Tue, 29 Mar 2011 13:29:31 +0000 Subject: [PATCH 2/5] Added configuration option editor_autodetect_linebreak. Allows to enable or disable line break type detection in the editor. Default value is off, based on concerns expressed regarding binary-safety of the editor. With this setting conservatively set to off, editor behavior stays exactly the same as before, unless user explicitly enables this option. Signed-off-by: Ilia Maslakov Signed-off-by: Paul Sokolovsky --- src/editor/edit.c | 4 +++- src/editor/edit.h | 1 + src/editor/editoptions.c | 2 ++ src/setup.c | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/editor/edit.c b/src/editor/edit.c index 5ead0caae6..04f148b8f1 100644 --- a/src/editor/edit.c +++ b/src/editor/edit.c @@ -90,6 +90,7 @@ int option_line_state = 0; int option_line_state_width = 0; gboolean option_cursor_after_inserted_block = FALSE; int option_state_full_filename = 0; +int option_autodetect_lb = 0; int option_edit_right_extreme = 0; int option_edit_left_extreme = 0; @@ -452,7 +453,8 @@ edit_load_file (WEdit * edit) edit_clean (edit); return FALSE; } - lb_type = detect_lb_type (edit->filename_vpath); + if (option_autodetect_lb) + lb_type = detect_lb_type (edit->filename_vpath); if (lb_type != LB_ASIS && lb_type != LB_UNIX) fast_load = FALSE; diff --git a/src/editor/edit.h b/src/editor/edit.h index 1359891c4d..9e4f94d4d4 100644 --- a/src/editor/edit.h +++ b/src/editor/edit.h @@ -37,6 +37,7 @@ extern int option_auto_para_formatting; extern int option_fill_tabs_with_spaces; extern int option_return_does_auto_indent; extern int option_backspace_through_tabs; +extern int option_autodetect_lb; extern int option_fake_half_tabs; extern int option_persistent_selections; extern int option_drop_selection_on_copy; diff --git a/src/editor/editoptions.c b/src/editor/editoptions.c index b128d0049c..f3f970ec91 100644 --- a/src/editor/editoptions.c +++ b/src/editor/editoptions.c @@ -150,6 +150,7 @@ edit_options_dialog (WDialog * h) QUICK_STOP_GROUPBOX, QUICK_SEPARATOR (FALSE), QUICK_SEPARATOR (FALSE), + QUICK_SEPARATOR (FALSE), QUICK_START_GROUPBOX (N_("Tabulation")), QUICK_CHECKBOX (N_("&Fake half tabs"), &option_fake_half_tabs, NULL), QUICK_CHECKBOX (N_("&Backspace through tabs"), &option_backspace_through_tabs, @@ -176,6 +177,7 @@ edit_options_dialog (WDialog * h) QUICK_CHECKBOX (N_("&Group undo"), &option_group_undo, NULL), QUICK_LABELED_INPUT (N_("Word wrap line length:"), input_label_left, wrap_length, "edit-word-wrap", &p, NULL, FALSE, FALSE, INPUT_COMPLETE_NONE), + QUICK_CHECKBOX (N_("&Autodetect line breaks type"), &option_autodetect_lb, NULL), QUICK_STOP_GROUPBOX, QUICK_STOP_COLUMNS, QUICK_BUTTONS_OK_CANCEL, diff --git a/src/setup.c b/src/setup.c index 5cd32ab281..41522b5176 100644 --- a/src/setup.c +++ b/src/setup.c @@ -338,6 +338,7 @@ static const struct { "editor_word_wrap_line_length", &option_word_wrap_line_length }, { "editor_fill_tabs_with_spaces", &option_fill_tabs_with_spaces }, { "editor_return_does_auto_indent", &option_return_does_auto_indent }, + { "editor_autodetect_linebreak", &option_autodetect_lb }, { "editor_backspace_through_tabs", &option_backspace_through_tabs }, { "editor_fake_half_tabs", &option_fake_half_tabs }, { "editor_option_save_mode", &option_save_mode }, From 68cd8b04f88c42c7da36e6f528bd1e00dd803187 Mon Sep 17 00:00:00 2001 From: Slava Zanko Date: Thu, 20 Oct 2011 16:56:15 +0300 Subject: [PATCH 3/5] Added unit tests for checking "detect type of line breaks" functionality Signed-off-by: Slava Zanko --- tests/src/editor/Makefile.am | 11 +- tests/src/editor/common_editor_includes.c | 87 ++++++ tests/src/editor/detect_linebreaks.c | 311 ++++++++++++++++++++++ 3 files changed, 407 insertions(+), 2 deletions(-) create mode 100644 tests/src/editor/common_editor_includes.c create mode 100644 tests/src/editor/detect_linebreaks.c diff --git a/tests/src/editor/Makefile.am b/tests/src/editor/Makefile.am index 93e9f92650..2eec2ab184 100644 --- a/tests/src/editor/Makefile.am +++ b/tests/src/editor/Makefile.am @@ -4,6 +4,7 @@ AM_CPPFLAGS = \ -DTEST_SHARE_DIR=\"$(abs_srcdir)\" \ $(GLIB_CFLAGS) \ -I$(top_srcdir) \ + -I$(top_srcdir)/src/editor \ @CHECK_CFLAGS@ AM_LDFLAGS = @TESTS_LDFLAGS@ @@ -17,13 +18,19 @@ if ENABLE_VFS_SMB LIBS += $(top_builddir)/src/vfs/smbfs/helpers/libsamba.a endif -EXTRA_DIST = mc.charsets test-data.txt.in +EXTRA_DIST = mc.charsets test-data.txt.in \ + common_editor_includes.c + +CLEANFILES = detect_linebreaks.log TESTS = \ - editcmd__edit_complete_word_cmd + editcmd__edit_complete_word_cmd \ + detect_linebreaks check_PROGRAMS = $(TESTS) editcmd__edit_complete_word_cmd_SOURCES = \ editcmd__edit_complete_word_cmd.c +detect_linebreaks_SOURCES = \ + detect_linebreaks.c diff --git a/tests/src/editor/common_editor_includes.c b/tests/src/editor/common_editor_includes.c new file mode 100644 index 0000000000..a86eab7301 --- /dev/null +++ b/tests/src/editor/common_editor_includes.c @@ -0,0 +1,87 @@ +/* + src/editor - common include files for testing static functions + + Copyright (C) 2011 + The Free Software Foundation, Inc. + + Written by: + Slava Zanko , 2011 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "edit-widget.h" +#include "lib/global.h" +#include "lib/keybind.h" + +/* ********************************************************************************************* */ +/* mock variables and functions */ + +int drop_menus = 0; +const global_keymap_t *editor_map = NULL; +const global_keymap_t *editor_x_map = NULL; +GArray *macros_list = NULL; +int option_tab_spacing = 8; +int macro_index = -1; + +static gboolean +do_select_codepage (void) +{ + return TRUE; +} +static gboolean +user_menu_cmd (struct WEdit *edit_widget, const char *menu_file, int selected_entry) +{ + (void) edit_widget; + (void) menu_file; + (void) selected_entry; + return TRUE; +} +static int +check_for_default (const char *default_file, const char *file) +{ + (void) default_file; + (void) file; + return 0; +} +static void +save_setup_cmd (void) +{ +} +static void +learn_keys (void) +{ +} +static void +view_other_cmd (void) +{ +} + +/* ********************************************************************************************* */ + +#include "bookmark.c" +#include "edit.c" +#include "editcmd.c" +#include "editwidget.c" +#include "editdraw.c" +#include "editkeys.c" +#include "editmenu.c" +#include "editoptions.c" +#include "syntax.c" +#include "wordproc.c" +#include "choosesyntax.c" +#include "etags.c" +#include "editcmd_dialogs.c" diff --git a/tests/src/editor/detect_linebreaks.c b/tests/src/editor/detect_linebreaks.c new file mode 100644 index 0000000000..371c1b3372 --- /dev/null +++ b/tests/src/editor/detect_linebreaks.c @@ -0,0 +1,311 @@ +/* + src/editor - check 'detect linebreaks' functionality + + Copyright (C) 2011 + The Free Software Foundation, Inc. + + Written by: + Slava Zanko , 2011 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define TEST_SUITE_NAME "src/editor/detect_linebreaks" + +#include + +#include + +#include "common_editor_includes.c" /* for testing static functions*/ + +#include "src/vfs/local/local.h" + +const char *filename = "detect_linebreaks.in.txt"; +struct macro_action_t record_macro_buf[MAX_MACRO_LENGTH]; + +static void +setup (void) +{ + str_init_strings (NULL); + + vfs_init (); + init_localfs (); + vfs_setup_work_dir (); +} + +static void +teardown (void) +{ + vfs_shut (); +} + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type) +{ + LineBreaks result; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + int i; + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + for (i = 0;i<200;i++) + { + write (fd, "Test for detect line break\r\n", 29); + } + write (fd, "\r\n", 2); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_WIN, "Incorrect lineBreak: result(%d) != LB_WIN(%d)",result, LB_WIN); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_very_long_string) +{ + LineBreaks result; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + int i; + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + for (i = 0; i<20 ; i++) + { + write (fd, "Very long string. ", 18); + } + write (fd, "\r\n", 2); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_WIN, "Incorrect lineBreak: result(%d) != LB_WIN(%d)",result, LB_WIN); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_rrrrrn) +{ + LineBreaks result; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + write (fd, "test\r\r\r\r\r\n", 10); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_MAC, "Incorrect lineBreak: result(%d) != LB_MAC(%d)",result, LB_MAC); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_nnnnnr) +{ + LineBreaks result; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + write (fd, "test\n\n\n\n\n\r", 10); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)",result, LB_ASIS); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_rrrrrr) +{ + LineBreaks result; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + write (fd, "test\r\r\r\r\r\r", 10); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_MAC, "Incorrect lineBreak: result(%d) != LB_MAC(%d)",result, LB_MAC); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_nnnnnn) +{ + LineBreaks result; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + write (fd, "test\n\n\n\n\n\n", 10); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)",result, LB_ASIS); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_buffer_border) +{ + LineBreaks result; + char buf[DETECT_LB_TYPE_BUFLEN]; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + memset(buf, ' ', DETECT_LB_TYPE_BUFLEN); + buf[DETECT_LB_TYPE_BUFLEN - 1] = '\r'; + + write (fd, buf, DETECT_LB_TYPE_BUFLEN); + write (fd, "\n", 1); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_WIN, "Incorrect lineBreak: result(%d) != LB_WIN(%d)",result, LB_WIN); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_buffer_border_overflow) +{ + LineBreaks result; + char buf[DETECT_LB_TYPE_BUFLEN]; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + memset(buf, ' ', DETECT_LB_TYPE_BUFLEN); + buf[DETECT_LB_TYPE_BUFLEN - 1] = '\r'; + + write (fd, buf, DETECT_LB_TYPE_BUFLEN); + write (fd, "bla-bla\r\n", 9); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_MAC, "Incorrect lineBreak: result(%d) != LB_MAC(%d)",result, LB_MAC); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_buffer_border_more) +{ + LineBreaks result; + char buf[DETECT_LB_TYPE_BUFLEN]; + /* prepare for test */ + int fd = open (filename, O_WRONLY|O_CREAT, 0644); + if (fd == -1) + { + fail("unable to create test input file %s",filename); + return; + } + memset(buf, ' ', DETECT_LB_TYPE_BUFLEN); + + write (fd, buf, DETECT_LB_TYPE_BUFLEN); + write (fd, "bla-bla\n", 8); + close(fd); + + result = detect_lb_type ((char *) filename); + fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)",result, LB_ASIS); + + unlink(filename); +} +END_TEST + +/* --------------------------------------------------------------------------------------------- */ + +int +main (void) +{ + int number_failed; + + Suite *s = suite_create (TEST_SUITE_NAME); + TCase *tc_core = tcase_create ("Core"); + SRunner *sr; + + tcase_add_checked_fixture (tc_core, setup, teardown); + + /* Add new tests here: *************** */ + tcase_add_test (tc_core, test_detect_lb_type); + tcase_add_test (tc_core, test_detect_lb_type_very_long_string); + tcase_add_test (tc_core, test_detect_lb_type_rrrrrn); + tcase_add_test (tc_core, test_detect_lb_type_nnnnnr); + tcase_add_test (tc_core, test_detect_lb_type_rrrrrr); + tcase_add_test (tc_core, test_detect_lb_type_nnnnnn); + tcase_add_test (tc_core, test_detect_lb_type_buffer_border); + tcase_add_test (tc_core, test_detect_lb_type_buffer_border_overflow); + tcase_add_test (tc_core, test_detect_lb_type_buffer_border_more); + /* *********************************** */ + + suite_add_tcase (s, tc_core); + sr = srunner_create (s); + srunner_set_log (sr, "detect_linebreaks.log"); + srunner_run_all (sr, CK_NORMAL); + number_failed = srunner_ntests_failed (sr); + srunner_free (sr); + return (number_failed == 0) ? 0 : 1; +} + +/* --------------------------------------------------------------------------------------------- */ From e55490666130682fd04f59383bcac4fdf86011fa Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 6 Dec 2014 04:16:04 +0200 Subject: [PATCH 4/5] edit.c: Make line break detection much more conservative. Detection now requires consistent line breaks being present within sampling buffer. If breaks of different types are found, or there's a slightest suspicion that it may be binary content, it reverts to binary-safe "as-is" mode. Also, refactor detection routines to facilitate unit testing. Signed-off-by: Paul Sokolovsky --- src/editor/edit.c | 64 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/src/editor/edit.c b/src/editor/edit.c index 04f148b8f1..27d2576e74 100644 --- a/src/editor/edit.c +++ b/src/editor/edit.c @@ -387,10 +387,62 @@ check_file_access (WEdit * edit, const vfs_path_t * filename_vpath, struct stat */ /* --------------------------------------------------------------------------------------------- */ +static LineBreaks +detect_lb_type_buf (unsigned char *p, ssize_t sz) +{ + LineBreaks detected_lb = LB_ASIS; + + /* If there was error or file too short, give up */ + if (sz <= 2) + return LB_ASIS; + + p[(size_t) sz] = '\0'; + /* Avoid ambiguity of our buffer breaking CR LF sequence */ + if (p[sz - 1] == '\r') { + p[--sz] = '\0'; + } + + for (; sz--; p++) { + LineBreaks new_lb = LB_ASIS; + if (*p == '\r') { + if (p[1] == '\n') { + sz--; p++; + new_lb = LB_WIN; + } else { + new_lb = LB_MAC; + } + } else if (*p == '\n') { + /* LF CR is anomaly for text file, give up */ + if (p[1] == '\r') + return LB_ASIS; + new_lb = LB_UNIX; + } else if (*p < 0x20 && *p != '\t' && *p != '\f') { + /* The only common special char in text files is tab, much + less commonly - form feed. Anything else - give up. */ + return LB_ASIS; + } + + /* If we detected a new lb, and it doesn't match previously + detected, give up */ + if (new_lb != LB_ASIS) { + if (detected_lb != LB_ASIS && detected_lb != new_lb) { + return LB_ASIS; + } + detected_lb = new_lb; + } + } + + /* LB_UNIX means that within buffer, we saw only LF breaks, but + we cannot be sure about entire file. So, go conservative route + and don't report to user in UI that this file has unix line + breaks. */ + return detected_lb == LB_UNIX ? LB_ASIS : detected_lb; +} + static LineBreaks detect_lb_type (const vfs_path_t *filename_vpath) { - char buf[BUF_MEDIUM]; + unsigned char buf[BUF_LARGE]; ssize_t file, sz; file = mc_open (filename_vpath, O_RDONLY | O_BINARY); @@ -400,15 +452,7 @@ detect_lb_type (const vfs_path_t *filename_vpath) sz = mc_read (file, buf, sizeof (buf) - 1); mc_close (file); - if (sz <= 0) - return LB_ASIS; - - buf[(size_t) sz] = '\0'; - if (strstr (buf, "\r\n") != NULL) - return LB_WIN; - if (strchr (buf, '\r') != NULL) - return LB_MAC; - return LB_ASIS; + return detect_lb_type_buf (buf, sz); } /* --------------------------------------------------------------------------------------------- */ From 6d496be799646dc5751fcb68a09acbe0b73e1f68 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 6 Dec 2014 04:27:31 +0200 Subject: [PATCH 5/5] tests: detect_linebreaks.c: Revamp for latest codebase. Also, test only line break detection algorithm, not anything else (no file operations, etc.). Signed-off-by: Paul Sokolovsky --- tests/src/editor/common_editor_includes.c | 11 +- tests/src/editor/detect_linebreaks.c | 169 ++++++---------------- 2 files changed, 48 insertions(+), 132 deletions(-) diff --git a/tests/src/editor/common_editor_includes.c b/tests/src/editor/common_editor_includes.c index a86eab7301..bc1e572300 100644 --- a/tests/src/editor/common_editor_includes.c +++ b/tests/src/editor/common_editor_includes.c @@ -23,7 +23,7 @@ along with this program. If not, see . */ -#include "edit-widget.h" +#include "editwidget.h" #include "lib/global.h" #include "lib/keybind.h" @@ -50,13 +50,6 @@ user_menu_cmd (struct WEdit *edit_widget, const char *menu_file, int selected_en (void) selected_entry; return TRUE; } -static int -check_for_default (const char *default_file, const char *file) -{ - (void) default_file; - (void) file; - return 0; -} static void save_setup_cmd (void) { @@ -77,11 +70,9 @@ view_other_cmd (void) #include "editcmd.c" #include "editwidget.c" #include "editdraw.c" -#include "editkeys.c" #include "editmenu.c" #include "editoptions.c" #include "syntax.c" -#include "wordproc.c" #include "choosesyntax.c" #include "etags.c" #include "editcmd_dialogs.c" diff --git a/tests/src/editor/detect_linebreaks.c b/tests/src/editor/detect_linebreaks.c index 371c1b3372..57e2be13ea 100644 --- a/tests/src/editor/detect_linebreaks.c +++ b/tests/src/editor/detect_linebreaks.c @@ -25,6 +25,10 @@ #define TEST_SUITE_NAME "src/editor/detect_linebreaks" +#include +#include +#include + #include #include @@ -57,22 +61,9 @@ teardown (void) START_TEST (test_detect_lb_type) { LineBreaks result; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - int i; - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } - for (i = 0;i<200;i++) - { - write (fd, "Test for detect line break\r\n", 29); - } - write (fd, "\r\n", 2); - close(fd); + char buf[] = "Test for detect line break\r\n"; - result = detect_lb_type ((char *) filename); + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); fail_unless(result == LB_WIN, "Incorrect lineBreak: result(%d) != LB_WIN(%d)",result, LB_WIN); unlink(filename); @@ -84,25 +75,16 @@ END_TEST START_TEST (test_detect_lb_type_very_long_string) { LineBreaks result; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); + char buf[1024] = ""; int i; - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } for (i = 0; i<20 ; i++) { - write (fd, "Very long string. ", 18); + strcat (buf, "Very long string. "); } - write (fd, "\r\n", 2); - close(fd); + strcat (buf, "\r\n"); - result = detect_lb_type ((char *) filename); + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); fail_unless(result == LB_WIN, "Incorrect lineBreak: result(%d) != LB_WIN(%d)",result, LB_WIN); - - unlink(filename); } END_TEST @@ -111,20 +93,11 @@ END_TEST START_TEST (test_detect_lb_type_rrrrrn) { LineBreaks result; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } - write (fd, "test\r\r\r\r\r\n", 10); - close(fd); + char buf[1024] = ""; + strcat (buf, "test\r\r\r\r\r\n"); - result = detect_lb_type ((char *) filename); - fail_unless(result == LB_MAC, "Incorrect lineBreak: result(%d) != LB_MAC(%d)",result, LB_MAC); - - unlink(filename); + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); + fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)", result, LB_ASIS); } END_TEST @@ -133,20 +106,22 @@ END_TEST START_TEST (test_detect_lb_type_nnnnnr) { LineBreaks result; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } - write (fd, "test\n\n\n\n\n\r", 10); - close(fd); + char buf[] = "test\n\n\n\n\n\r "; - result = detect_lb_type ((char *) filename); + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)",result, LB_ASIS); +} +END_TEST - unlink(filename); +/* --------------------------------------------------------------------------------------------- */ + +START_TEST (test_detect_lb_type_nnnrnrnnnn) +{ + LineBreaks result; + char buf[] = "test\n\n\n\r\n\r\n\r\n\n\n\n"; + + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); + fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)",result, LB_ASIS); } END_TEST @@ -155,20 +130,10 @@ END_TEST START_TEST (test_detect_lb_type_rrrrrr) { LineBreaks result; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } - write (fd, "test\r\r\r\r\r\r", 10); - close(fd); + char buf[1024] = "test\r\r\r\r\r\r"; - result = detect_lb_type ((char *) filename); - fail_unless(result == LB_MAC, "Incorrect lineBreak: result(%d) != LB_MAC(%d)",result, LB_MAC); - - unlink(filename); + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); + fail_unless(result == LB_MAC, "Incorrect lineBreak: result(%d) != LB_MAC(%d)", result, LB_MAC); } END_TEST @@ -177,20 +142,10 @@ END_TEST START_TEST (test_detect_lb_type_nnnnnn) { LineBreaks result; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } - write (fd, "test\n\n\n\n\n\n", 10); - close(fd); - - result = detect_lb_type ((char *) filename); - fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)",result, LB_ASIS); + char buf[1024] = "test\n\n\n\n\n\n"; - unlink(filename); + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); + fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)", result, LB_ASIS); } END_TEST @@ -199,25 +154,15 @@ END_TEST START_TEST (test_detect_lb_type_buffer_border) { LineBreaks result; - char buf[DETECT_LB_TYPE_BUFLEN]; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } + char buf[DETECT_LB_TYPE_BUFLEN + 100]; memset(buf, ' ', DETECT_LB_TYPE_BUFLEN); + buf[DETECT_LB_TYPE_BUFLEN - 101] = '\r'; + buf[DETECT_LB_TYPE_BUFLEN - 100] = '\n'; buf[DETECT_LB_TYPE_BUFLEN - 1] = '\r'; + buf[DETECT_LB_TYPE_BUFLEN] = '\n'; - write (fd, buf, DETECT_LB_TYPE_BUFLEN); - write (fd, "\n", 1); - close(fd); - - result = detect_lb_type ((char *) filename); - fail_unless(result == LB_WIN, "Incorrect lineBreak: result(%d) != LB_WIN(%d)",result, LB_WIN); - - unlink(filename); + result = detect_lb_type_buf ((unsigned char *) buf, DETECT_LB_TYPE_BUFLEN); + fail_unless(result == LB_WIN, "Incorrect lineBreak: result(%d) != LB_WIN(%d)", result, LB_WIN); } END_TEST @@ -226,25 +171,15 @@ END_TEST START_TEST (test_detect_lb_type_buffer_border_overflow) { LineBreaks result; - char buf[DETECT_LB_TYPE_BUFLEN]; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } + char buf[DETECT_LB_TYPE_BUFLEN + 100]; memset(buf, ' ', DETECT_LB_TYPE_BUFLEN); + buf[DETECT_LB_TYPE_BUFLEN - 100] = '\r'; buf[DETECT_LB_TYPE_BUFLEN - 1] = '\r'; - write (fd, buf, DETECT_LB_TYPE_BUFLEN); - write (fd, "bla-bla\r\n", 9); - close(fd); + strcat (buf, "bla-bla\r\n"); - result = detect_lb_type ((char *) filename); + result = detect_lb_type_buf ((unsigned char *) buf, DETECT_LB_TYPE_BUFLEN); fail_unless(result == LB_MAC, "Incorrect lineBreak: result(%d) != LB_MAC(%d)",result, LB_MAC); - - unlink(filename); } END_TEST @@ -253,24 +188,13 @@ END_TEST START_TEST (test_detect_lb_type_buffer_border_more) { LineBreaks result; - char buf[DETECT_LB_TYPE_BUFLEN]; - /* prepare for test */ - int fd = open (filename, O_WRONLY|O_CREAT, 0644); - if (fd == -1) - { - fail("unable to create test input file %s",filename); - return; - } + char buf[DETECT_LB_TYPE_BUFLEN + 100]; memset(buf, ' ', DETECT_LB_TYPE_BUFLEN); - write (fd, buf, DETECT_LB_TYPE_BUFLEN); - write (fd, "bla-bla\n", 8); - close(fd); + strcat (buf, "bla-bla\n"); - result = detect_lb_type ((char *) filename); + result = detect_lb_type_buf ((unsigned char *) buf, strlen(buf)); fail_unless(result == LB_ASIS, "Incorrect lineBreak: result(%d) != LB_ASIS(%d)",result, LB_ASIS); - - unlink(filename); } END_TEST @@ -292,6 +216,7 @@ main (void) tcase_add_test (tc_core, test_detect_lb_type_very_long_string); tcase_add_test (tc_core, test_detect_lb_type_rrrrrn); tcase_add_test (tc_core, test_detect_lb_type_nnnnnr); + tcase_add_test (tc_core, test_detect_lb_type_nnnrnrnnnn); tcase_add_test (tc_core, test_detect_lb_type_rrrrrr); tcase_add_test (tc_core, test_detect_lb_type_nnnnnn); tcase_add_test (tc_core, test_detect_lb_type_buffer_border);