From befe7e3d2e9c8d28788e2506e0e682aa3ca6a684 Mon Sep 17 00:00:00 2001 From: keymone Date: Sun, 7 Feb 2016 15:46:32 +0100 Subject: [PATCH 01/32] RegexReader for #" syntax RegexReader transforms #"foo"iL into (re2 "foo" {:ignore_case :literal}) --- pixie/vm/reader.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pixie/vm/reader.py b/pixie/vm/reader.py index 09e4fdf9..1668f536 100644 --- a/pixie/vm/reader.py +++ b/pixie/vm/reader.py @@ -620,10 +620,41 @@ def invoke(self, rdr, ch): read_inner(rdr, True, always_return_form=True) return rdr +class RegexReader(ReaderHandler): + def invoke(self, rdr, ch): + regex_str = LiteralStringReader().invoke(rdr, ch) + regex_opts = EMPTY_SET + opts_dict = { + u"a": keyword(u"ascii"), + u"p": keyword(u"posix"), + u"l": keyword(u"longest_match"), + u"s": keyword(u"silent"), + u"L": keyword(u"literal"), + u"n": keyword(u"never_nl"), + u"m": keyword(u"dot_nl"), + u"c": keyword(u"never_capture"), + u"i": keyword(u"ignore_case") + } + + # read options (https://github.com/google/re2/blob/master/re2/re2.h#L517) + while True: + try: + opt = opts_dict.get(rdr.read(), None) + if opt is None: + rdr.unread() + break + else: + regex_opts = regex_opts.conj(opt) + except EOFError: + break + + return rt.cons(symbol(u"re2"), rt.cons(regex_str, rt.cons(regex_opts, nil))) + dispatch_handlers = { u"{": SetReader(), u"(": FnReader(), - u"_": CommentReader() + u"_": CommentReader(), + u"\"": RegexReader() } class DispatchReader(ReaderHandler): From eb56a1620b9780eb01b8466eab07b260ae837b92 Mon Sep 17 00:00:00 2001 From: keymone Date: Tue, 9 Feb 2016 13:30:29 +0100 Subject: [PATCH 02/32] fetch re2 and cre2 --- Makefile | 23 +++++++++++++++++++---- pixie/regex.pxi | 25 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 pixie/regex.pxi diff --git a/Makefile b/Makefile index 15f691e5..67777739 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ compile_basics: build: fetch_externals $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) $(JIT_OPTS) $(TARGET_OPTS) -fetch_externals: $(EXTERNALS)/pypy externals.fetched +fetch_externals: $(EXTERNALS)/pypy $(EXTERNALS)/cre2 externals.fetched externals.fetched: echo https://github.com/pixie-lang/external-deps/releases/download/1.0/`uname -s`-`uname -m`.tar.bz2 @@ -49,15 +49,30 @@ externals.fetched: tar -jxf /tmp/externals.tar.bz2 --strip-components=2 touch externals.fetched - -$(EXTERNALS)/pypy: - mkdir $(EXTERNALS); \ +$(EXTERNALS)/pypy: $(EXTERNALS) cd $(EXTERNALS); \ curl https://bitbucket.org/pypy/pypy/get/81254.tar.bz2 > pypy.tar.bz2; \ mkdir pypy; \ cd pypy; \ tar -jxf ../pypy.tar.bz2 --strip-components=1 +$(EXTERNALS)/re2: $(EXTERNALS) + cd $(EXTERNALS) && \ + curl -sL https://github.com/google/re2/archive/2016-02-01.tar.gz > re2.tar.gz && \ + mkdir re2 && \ + cd re2 && \ + tar -jxf ../re2.tar.gz --strip-components=1 + +$(EXTERNALS)/cre2: $(EXTERNALS)/re2 + cd $(EXTERNALS) && \ + curl -sL https://github.com/marcomaggi/cre2/archive/0.1b6.tar.gz > cre2.tar.gz && \ + mkdir cre2 && \ + cd cre2 && \ + tar -jxf ../cre2.tar.gz --strip-components=1 + +$(EXTERNALS): + mkdir $(EXTERNALS) + run: ./pixie-vm diff --git a/pixie/regex.pxi b/pixie/regex.pxi new file mode 100644 index 00000000..930cb397 --- /dev/null +++ b/pixie/regex.pxi @@ -0,0 +1,25 @@ +(ns pixie.regex + (:require [pixie.ffi-infer :as i])) + +(i/with-config {:library "re2" + :cxx-flags ["-lre2"] + :includes ["re2.h"]} + (i/defconst M_E) + (i/defconst M_LOG2E) + (i/defconst M_LOG10E) + (i/defconst M_LN2) + (i/defconst M_LN10) + (i/defconst M_PI) + (i/defconst M_PI_2) + (i/defconst M_PI_4) + (i/defconst M_1_PI) + (i/defconst M_2_PI) + (i/defconst M_2_SQRTPI) + (i/defconst M_SQRT2) + (i/defconst M_SQRT1_2) + + (i/defcfn nan) + (i/defcfn ceil) + (i/defcfn floor) + (i/defcfn nearbyint) + (i/defcfn rint) \ No newline at end of file From 86c23bd455b8ac630540ed236f7d2c19c22eee9f Mon Sep 17 00:00:00 2001 From: keymone Date: Sat, 13 Feb 2016 14:23:11 +0100 Subject: [PATCH 03/32] build re2 and cre2 --- Makefile | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 67777739..b22afa0d 100644 --- a/Makefile +++ b/Makefile @@ -38,10 +38,10 @@ compile_basics: @echo -e "\n\n\n\nWARNING: Compiling core libs. If you want to modify one of these files delete the .pxic files first\n\n\n\n" ./pixie-vm -c pixie/uv.pxi -c pixie/io.pxi -c pixie/stacklets.pxi -c pixie/stdlib.pxi -c pixie/repl.pxi -build: fetch_externals +build: fetch_externals re2_cre2 $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) $(JIT_OPTS) $(TARGET_OPTS) -fetch_externals: $(EXTERNALS)/pypy $(EXTERNALS)/cre2 externals.fetched +fetch_externals: $(EXTERNALS)/pypy externals.fetched externals.fetched: echo https://github.com/pixie-lang/external-deps/releases/download/1.0/`uname -s`-`uname -m`.tar.bz2 @@ -49,6 +49,9 @@ externals.fetched: tar -jxf /tmp/externals.tar.bz2 --strip-components=2 touch externals.fetched +$(EXTERNALS): + mkdir $(EXTERNALS) + $(EXTERNALS)/pypy: $(EXTERNALS) cd $(EXTERNALS); \ curl https://bitbucket.org/pypy/pypy/get/81254.tar.bz2 > pypy.tar.bz2; \ @@ -59,19 +62,33 @@ $(EXTERNALS)/pypy: $(EXTERNALS) $(EXTERNALS)/re2: $(EXTERNALS) cd $(EXTERNALS) && \ curl -sL https://github.com/google/re2/archive/2016-02-01.tar.gz > re2.tar.gz && \ - mkdir re2 && \ + mkdir -p re2 && \ cd re2 && \ tar -jxf ../re2.tar.gz --strip-components=1 -$(EXTERNALS)/cre2: $(EXTERNALS)/re2 +$(EXTERNALS)/re2/obj/libre2.a: $(EXTERNALS)/re2 + cd $(EXTERNALS)/re2 && make + +$(EXTERNALS)/cre2: cd $(EXTERNALS) && \ - curl -sL https://github.com/marcomaggi/cre2/archive/0.1b6.tar.gz > cre2.tar.gz && \ - mkdir cre2 && \ + curl -sL https://github.com/keymone/cre2/archive/f1157647f9ca3ef11fd6447433f36e7c7bd64d09.tar.gz > cre2.tar.xz && \ + mkdir -p cre2 && \ cd cre2 && \ - tar -jxf ../cre2.tar.gz --strip-components=1 + tar -jxf ../cre2.tar.xz --strip-components=1 -$(EXTERNALS): - mkdir $(EXTERNALS) +$(EXTERNALS)/cre2/build/.libs/libcre2.a: $(EXTERNALS)/cre2 + cd $(EXTERNALS)/cre2 && \ + LIBTOOLIZE=`env which -a libtoolize glibtoolize | head -n1` sh autogen.sh && \ + mkdir -p build && \ + cd build && \ + ../configure --enable-maintainer-mode LDFLAGS="-L`pwd`/../../re2/obj" CPPFLAGS="-I`pwd`/../../re2" && \ + make + +re2: $(EXTERNALS)/re2/obj/libre2.a + +cre2: $(EXTERNALS)/cre2/build/.libs/libcre2.a + +re2_cre2: re2 cre2 run: ./pixie-vm From a0615e2e445ea4dabf96763015c002bed6766da1 Mon Sep 17 00:00:00 2001 From: keymone Date: Sat, 13 Feb 2016 16:58:46 +0100 Subject: [PATCH 04/32] parse regexp syntax into regexp call instead of re2, define regexp in stdlib --- pixie/stdlib.pxi | 7 +++++++ pixie/vm/reader.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 46f656a1..706abcf9 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -3072,3 +3072,10 @@ ex: (vary-meta x assoc :foo 42)" (swap! cache assoc argsv ret) ret) val))))) + +(defn regexp + {:doc "Returns internal representation for regular + expression, used in matching functions." + :signatures [[rexegp-str opts]]} + [regexp-str opts] + (println (str regexp-str " " opts))) diff --git a/pixie/vm/reader.py b/pixie/vm/reader.py index 1668f536..d05998d4 100644 --- a/pixie/vm/reader.py +++ b/pixie/vm/reader.py @@ -648,7 +648,7 @@ def invoke(self, rdr, ch): except EOFError: break - return rt.cons(symbol(u"re2"), rt.cons(regex_str, rt.cons(regex_opts, nil))) + return rt.cons(symbol(u"regexp"), rt.cons(regex_str, rt.cons(regex_opts, nil))) dispatch_handlers = { u"{": SetReader(), From ab35e3faba296289ab6118a2a3381b7177d94e9f Mon Sep 17 00:00:00 2001 From: keymone Date: Sat, 13 Feb 2016 19:08:16 +0100 Subject: [PATCH 05/32] strip regex.pxi --- pixie/regex.pxi | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/pixie/regex.pxi b/pixie/regex.pxi index 930cb397..1bc35435 100644 --- a/pixie/regex.pxi +++ b/pixie/regex.pxi @@ -1,25 +1,12 @@ (ns pixie.regex (:require [pixie.ffi-infer :as i])) -(i/with-config {:library "re2" - :cxx-flags ["-lre2"] - :includes ["re2.h"]} +(i/with-config {:library "cre2" + :cxx-flags [" && pwd" + "-Lexternals/cre2/build/.libs" + "-lcre2" + "-Iexternals/cre2/src"] + :includes ["cre2.h"]} (i/defconst M_E) - (i/defconst M_LOG2E) - (i/defconst M_LOG10E) - (i/defconst M_LN2) - (i/defconst M_LN10) - (i/defconst M_PI) - (i/defconst M_PI_2) - (i/defconst M_PI_4) - (i/defconst M_1_PI) - (i/defconst M_2_PI) - (i/defconst M_2_SQRTPI) - (i/defconst M_SQRT2) - (i/defconst M_SQRT1_2) - (i/defcfn nan) - (i/defcfn ceil) - (i/defcfn floor) - (i/defcfn nearbyint) - (i/defcfn rint) \ No newline at end of file + (i/defcfn rint)) \ No newline at end of file From 6a1068d531bf8244d0d0129b545e4e993f6958ac Mon Sep 17 00:00:00 2001 From: keymone Date: Thu, 18 Feb 2016 10:11:37 +0100 Subject: [PATCH 06/32] delete using find flag --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b22afa0d..c77d934b 100644 --- a/Makefile +++ b/Makefile @@ -114,7 +114,7 @@ compile_src: find * -name "*.pxi" | grep "^pixie/" | xargs -L1 ./pixie-vm $(EXTERNALS_FLAGS) -c clean_pxic: - find * -name "*.pxic" | xargs --no-run-if-empty rm + find * -name "*.pxic" -delete clean: clean_pxic rm -rf ./lib From 8bcad2f6fa88d6c87383ae946825ea4a2c8bf9cf Mon Sep 17 00:00:00 2001 From: keymone Date: Thu, 18 Feb 2016 10:13:32 +0100 Subject: [PATCH 07/32] import version fun --- pixie/regex.pxi | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pixie/regex.pxi b/pixie/regex.pxi index 1bc35435..b4694aa1 100644 --- a/pixie/regex.pxi +++ b/pixie/regex.pxi @@ -2,11 +2,8 @@ (:require [pixie.ffi-infer :as i])) (i/with-config {:library "cre2" - :cxx-flags [" && pwd" - "-Lexternals/cre2/build/.libs" + :cxx-flags ["-Lexternals/cre2/build/.libs" "-lcre2" "-Iexternals/cre2/src"] :includes ["cre2.h"]} - (i/defconst M_E) - - (i/defcfn rint)) \ No newline at end of file + (i/defcfn cre2_version_string)) \ No newline at end of file From 495d090c08541596b9b3b727cbdabb2f3d03e1ce Mon Sep 17 00:00:00 2001 From: keymone Date: Thu, 18 Feb 2016 11:45:43 +0100 Subject: [PATCH 08/32] drop regexp from stdlib, regex literals only work if pixie.regex/regexp is refered directly --- pixie/regex.pxi | 63 +++++++++++++++++++++++++++++++++++++++++++++++- pixie/stdlib.pxi | 7 ------ 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/pixie/regex.pxi b/pixie/regex.pxi index b4694aa1..e422d2de 100644 --- a/pixie/regex.pxi +++ b/pixie/regex.pxi @@ -6,4 +6,65 @@ "-lcre2" "-Iexternals/cre2/src"] :includes ["cre2.h"]} - (i/defcfn cre2_version_string)) \ No newline at end of file + + (i/defcstruct cre2_string_t [:data :length]) + (i/defcfn cre2_version_string) + + ;; Options + (i/defcfn cre2_opt_new) + (i/defcfn cre2_opt_delete) + (i/defcfn cre2_opt_set_posix_syntax) + (i/defcfn cre2_opt_set_longest_match) + (i/defcfn cre2_opt_set_log_errors) + (i/defcfn cre2_opt_set_literal) + (i/defcfn cre2_opt_set_never_nl) + (i/defcfn cre2_opt_set_case_sensitive) + (i/defcfn cre2_opt_set_perl_classes) + (i/defcfn cre2_opt_set_word_boundary) + (i/defcfn cre2_opt_set_one_line) + (i/defcfn cre2_opt_set_max_mem) + (i/defcfn cre2_opt_set_encoding) + + ;; Construction / destruction + (i/defcfn cre2_new) + (i/defcfn cre2_delete) + + ;; Inspection + (i/defcfn cre2_pattern) + (i/defcfn cre2_error_code) + (i/defcfn cre2_num_capturing_groups) + (i/defcfn cre2_program_size) + + ;; Errors something? + (i/defcfn cre2_error_string) + (i/defcfn cre2_error_arg) + + ;; Matching + (i/defcstruct cre2_range_t [:start :past]) + (i/defcfn cre2_match) + (i/defcfn cre2_easy_match) + (i/defcfn cre2_strings_to_ranges) +) + +(def optmap + { :ascii #(cre2_set_encoding % 2) + :posix #(cre2_opt_set_posix_syntax % 1) + :longest_match #(cre2_opt_set_longest_match % 1) + :silent #(cre2_opt_set_log_errors % 0) + :literal #(cre2_opt_set_literal % 1) + :never_nl #(cre2_opt_set_never_nl % 1) + :dot_nl #(cre2_opt_set_one_line % 0) + :never_capture #(do %) ;; ?? + :ignore_case #(cre2_opt_set_case_sensitive % 0) }) + +(defn- cre2-opts [opts] + (let [opt (cre2_opt_new)] + (doseq [key opts] ((key optmap) opt)) + opt)) + +(defn regexp + {:doc "Returns internal representation for regular + expression, used in matching functions." + :signatures [[rexegp-str opts]]} + [regexp-str opts] + (cre2_new regexp-str (count regexp-str) (cre2-opts opts))) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 16a03c48..311df7fb 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -3073,13 +3073,6 @@ ex: (vary-meta x assoc :foo 42)" ret) val))))) -(defn regexp - {:doc "Returns internal representation for regular - expression, used in matching functions." - :signatures [[rexegp-str opts]]} - [regexp-str opts] - (println (str regexp-str " " opts))) - (deftype Iterate [f x] IReduce (-reduce [self rf init] From 95c2e2a10c59deb877d1d531a6efd29d203fefe3 Mon Sep 17 00:00:00 2001 From: keymone Date: Thu, 18 Feb 2016 12:19:36 +0100 Subject: [PATCH 09/32] match function --- pixie/regex.pxi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pixie/regex.pxi b/pixie/regex.pxi index e422d2de..5e74a6b6 100644 --- a/pixie/regex.pxi +++ b/pixie/regex.pxi @@ -68,3 +68,15 @@ :signatures [[rexegp-str opts]]} [regexp-str opts] (cre2_new regexp-str (count regexp-str) (cre2-opts opts))) + +(defn match + [pattern text] + (cre2_match + pattern + text + (count text) + 0 + (count text) + 1 ;; anchor 1 - no, 2 - start, 3 - both + (cre2_string_t) + (+ 1 (cre2_num_capturing_groups pattern)))) From 31a95b69b533294e8ba9b80d4c0b985d8c39f597 Mon Sep 17 00:00:00 2001 From: keymone Date: Thu, 18 Feb 2016 12:40:21 +0100 Subject: [PATCH 10/32] fix tar unzip flag --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4eddae75..e872295c 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ $(EXTERNALS)/re2: $(EXTERNALS) curl -sL https://github.com/google/re2/archive/2016-02-01.tar.gz > re2.tar.gz && \ mkdir -p re2 && \ cd re2 && \ - tar -jxf ../re2.tar.gz --strip-components=1 + tar -zxf ../re2.tar.gz --strip-components=1 $(EXTERNALS)/re2/obj/libre2.a: $(EXTERNALS)/re2 cd $(EXTERNALS)/re2 && make @@ -74,7 +74,7 @@ $(EXTERNALS)/cre2: curl -sL https://github.com/keymone/cre2/archive/f1157647f9ca3ef11fd6447433f36e7c7bd64d09.tar.gz > cre2.tar.xz && \ mkdir -p cre2 && \ cd cre2 && \ - tar -jxf ../cre2.tar.xz --strip-components=1 + tar -zxf ../cre2.tar.xz --strip-components=1 $(EXTERNALS)/cre2/build/.libs/libcre2.a: $(EXTERNALS)/cre2 cd $(EXTERNALS)/cre2 && \ From 5b0e8805679e17c8fe5ffc71fd8fa720a9698a3a Mon Sep 17 00:00:00 2001 From: keymone Date: Thu, 18 Feb 2016 20:57:14 +0100 Subject: [PATCH 11/32] initial attempt at protocols and records --- pixie/{regex.pxi => re.pxi} | 44 +++++++++++++++++++++++++++++++------ pixie/vm/reader.py | 3 ++- 2 files changed, 39 insertions(+), 8 deletions(-) rename pixie/{regex.pxi => re.pxi} (72%) diff --git a/pixie/regex.pxi b/pixie/re.pxi similarity index 72% rename from pixie/regex.pxi rename to pixie/re.pxi index 5e74a6b6..a7e7a0bd 100644 --- a/pixie/regex.pxi +++ b/pixie/re.pxi @@ -1,4 +1,4 @@ -(ns pixie.regex +(ns pixie.re (:require [pixie.ffi-infer :as i])) (i/with-config {:library "cre2" @@ -62,12 +62,6 @@ (doseq [key opts] ((key optmap) opt)) opt)) -(defn regexp - {:doc "Returns internal representation for regular - expression, used in matching functions." - :signatures [[rexegp-str opts]]} - [regexp-str opts] - (cre2_new regexp-str (count regexp-str) (cre2-opts opts))) (defn match [pattern text] @@ -80,3 +74,39 @@ 1 ;; anchor 1 - no, 2 - start, 3 - both (cre2_string_t) (+ 1 (cre2_num_capturing_groups pattern)))) + +(defn regex + {:doc "Returns internal representation for regular + expression, used in matching functions." + :signatures [[rexeg-str opts]]} + [regex-str opts] + (re-pattern regex-str opts)) + +(defprotocol IRegex + (re-matches [r s]) + (re-find [r s])) + +(defrecord CRE2Regex [pattern opts] + IFinalize + (-finalize! [this] + (println "dropping cre2 obj " this) + (cre2_opt_delete opts) + (cre2_delete pattern)) + + IRegex) + +(def ^:dynamic *default-re-engine* :cre2) + +;; an "open" engine registry +(defmulti re-engine (fn [k s o] k)) + +;; add cre2 to registry +(defmethod re-engine :cre2 [_ regex-str opts] + (let [copts (cre2-opts opts)] + (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) + +;; dispatch on the right engine constructor via registry +(defn re-pattern + ([s o] (re-pattern s o *default-re-engine*)) + ([s o kw] (re-engine kw s o))) + diff --git a/pixie/vm/reader.py b/pixie/vm/reader.py index d05998d4..9f313de2 100644 --- a/pixie/vm/reader.py +++ b/pixie/vm/reader.py @@ -648,7 +648,8 @@ def invoke(self, rdr, ch): except EOFError: break - return rt.cons(symbol(u"regexp"), rt.cons(regex_str, rt.cons(regex_opts, nil))) + return rt.cons(symbol(u"pixie.re/regex"), + rt.cons(regex_str, rt.cons(regex_opts, nil))) dispatch_handlers = { u"{": SetReader(), From eaedfb34b17d0d60bde16a2064ce2e63813dae90 Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 10:33:45 +0100 Subject: [PATCH 12/32] swappable engine and finalizable regex object - some renames - defprotocol IRegex - deftype CRE2Regex implementing IFinalize and IRegex --- pixie/re.pxi | 54 +++++++++++++++++++++------------------------------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/pixie/re.pxi b/pixie/re.pxi index a7e7a0bd..d0cb2d8d 100644 --- a/pixie/re.pxi +++ b/pixie/re.pxi @@ -46,7 +46,7 @@ (i/defcfn cre2_strings_to_ranges) ) -(def optmap +(def cre2_optmap { :ascii #(cre2_set_encoding % 2) :posix #(cre2_opt_set_posix_syntax % 1) :longest_match #(cre2_opt_set_longest_match % 1) @@ -57,43 +57,32 @@ :never_capture #(do %) ;; ?? :ignore_case #(cre2_opt_set_case_sensitive % 0) }) -(defn- cre2-opts [opts] +(defn cre2_make_opts [opts] (let [opt (cre2_opt_new)] - (doseq [key opts] ((key optmap) opt)) + (doseq [key opts] ((key cre2_optmap) opt)) opt)) - -(defn match +(defn cre2_run_match [pattern text] - (cre2_match - pattern - text - (count text) - 0 - (count text) - 1 ;; anchor 1 - no, 2 - start, 3 - both - (cre2_string_t) - (+ 1 (cre2_num_capturing_groups pattern)))) - -(defn regex - {:doc "Returns internal representation for regular - expression, used in matching functions." - :signatures [[rexeg-str opts]]} - [regex-str opts] - (re-pattern regex-str opts)) + (= 1 + (cre2_match pattern + text (count text) + 0 (count text) + 1 ;; anchor 1 - no, 2 - start, 3 - both + (cre2_string_t) + (+ 1 (cre2_num_capturing_groups pattern))))) (defprotocol IRegex - (re-matches [r s]) - (re-find [r s])) + (re-matches [r t])) -(defrecord CRE2Regex [pattern opts] +(deftype CRE2Regex [pattern opts] IFinalize (-finalize! [this] - (println "dropping cre2 obj " this) (cre2_opt_delete opts) (cre2_delete pattern)) - IRegex) + IRegex + (re-matches [_ text] (cre2_run_match pattern text))) (def ^:dynamic *default-re-engine* :cre2) @@ -102,11 +91,12 @@ ;; add cre2 to registry (defmethod re-engine :cre2 [_ regex-str opts] - (let [copts (cre2-opts opts)] + (let [copts (cre2_make_opts opts)] (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) -;; dispatch on the right engine constructor via registry -(defn re-pattern - ([s o] (re-pattern s o *default-re-engine*)) - ([s o kw] (re-engine kw s o))) - +(defn regex + {:doc "Returns internal representation for regular + expression, used in matching functions." + :signatures [[rexeg-str opts]]} + ([pattern opts] (regex pattern opts *default-re-engine*)) + ([pattern opts engine] (re-engine engine pattern opts))) From c9de43f5472e6b32d3012b42c866e3c3e2c7dc4a Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 10:50:24 +0100 Subject: [PATCH 13/32] use latest cre2 release, verify checksums of re2 and cre2 --- Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index e872295c..91fe270a 100644 --- a/Makefile +++ b/Makefile @@ -62,6 +62,7 @@ $(EXTERNALS)/pypy: $(EXTERNALS) $(EXTERNALS)/re2: $(EXTERNALS) cd $(EXTERNALS) && \ curl -sL https://github.com/google/re2/archive/2016-02-01.tar.gz > re2.tar.gz && \ + shasum -a 256 cre2.tar.xz | grep -q f246c43897ac341568a7460622138ec0dd8de9b6f5459686376fa23e9d8c1bb8 && \ mkdir -p re2 && \ cd re2 && \ tar -zxf ../re2.tar.gz --strip-components=1 @@ -69,19 +70,20 @@ $(EXTERNALS)/re2: $(EXTERNALS) $(EXTERNALS)/re2/obj/libre2.a: $(EXTERNALS)/re2 cd $(EXTERNALS)/re2 && make -$(EXTERNALS)/cre2: +$(EXTERNALS)/cre2: $(EXTERNALS) cd $(EXTERNALS) && \ - curl -sL https://github.com/keymone/cre2/archive/f1157647f9ca3ef11fd6447433f36e7c7bd64d09.tar.gz > cre2.tar.xz && \ + curl -sL https://bitbucket.org/marcomaggi/cre2/downloads/cre2-0.2.0.tar.xz > cre2.tar.xz && \ + shasum -a 256 cre2.tar.xz | grep -q d31118dbc9d2b1cf95c1b763ca92ae2ec4e262b1f8d8e995c1ffdc8eb40a82fc && \ mkdir -p cre2 && \ cd cre2 && \ tar -zxf ../cre2.tar.xz --strip-components=1 $(EXTERNALS)/cre2/build/.libs/libcre2.a: $(EXTERNALS)/cre2 cd $(EXTERNALS)/cre2 && \ - LIBTOOLIZE=`env which -a libtoolize glibtoolize | head -n1` sh autogen.sh && \ mkdir -p build && \ cd build && \ - ../configure --enable-maintainer-mode LDFLAGS="-L`pwd`/../../re2/obj" CPPFLAGS="-I`pwd`/../../re2" && \ + ../configure LDFLAGS="-L`pwd`/../../re2/obj" CPPFLAGS="-I`pwd`/../../re2" && \ + chmod +x ../meta/autotools/install-sh && \ make re2: $(EXTERNALS)/re2/obj/libre2.a @@ -93,7 +95,6 @@ re2_cre2: re2 cre2 run: ./pixie-vm - run_interactive: @PYTHONPATH=$(PYTHONPATH) $(PYTHON) target.py From f126af82d1c5ce7b117e7da64fcb455cf3f0117a Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 10:58:51 +0100 Subject: [PATCH 14/32] checksum the right file --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91fe270a..8ff78b4b 100644 --- a/Makefile +++ b/Makefile @@ -62,7 +62,7 @@ $(EXTERNALS)/pypy: $(EXTERNALS) $(EXTERNALS)/re2: $(EXTERNALS) cd $(EXTERNALS) && \ curl -sL https://github.com/google/re2/archive/2016-02-01.tar.gz > re2.tar.gz && \ - shasum -a 256 cre2.tar.xz | grep -q f246c43897ac341568a7460622138ec0dd8de9b6f5459686376fa23e9d8c1bb8 && \ + shasum -a 256 re2.tar.gz | grep -q f246c43897ac341568a7460622138ec0dd8de9b6f5459686376fa23e9d8c1bb8 && \ mkdir -p re2 && \ cd re2 && \ tar -zxf ../re2.tar.gz --strip-components=1 From 846fd238dcf1068d27be3704d1b58093d4b36f71 Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 11:02:56 +0100 Subject: [PATCH 15/32] fix tar flag for xz unzipping --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8ff78b4b..358f5022 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ $(EXTERNALS)/cre2: $(EXTERNALS) shasum -a 256 cre2.tar.xz | grep -q d31118dbc9d2b1cf95c1b763ca92ae2ec4e262b1f8d8e995c1ffdc8eb40a82fc && \ mkdir -p cre2 && \ cd cre2 && \ - tar -zxf ../cre2.tar.xz --strip-components=1 + tar -Jxf ../cre2.tar.xz --strip-components=1 $(EXTERNALS)/cre2/build/.libs/libcre2.a: $(EXTERNALS)/cre2 cd $(EXTERNALS)/cre2 && \ From 81a048412cd29586b7e6089b8e544e0007b31cdd Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 11:16:50 +0100 Subject: [PATCH 16/32] try to get travis install newer g++ --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index 77846892..f4b2cd62 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,8 @@ +language: cpp +compiler: + - g++ + - clang + sudo: false env: - JIT_OPTS='--opt=jit' TARGET_OPTS='target.py' From bab45374a4bf0573229470713587987873aae5cc Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 11:38:23 +0100 Subject: [PATCH 17/32] install g++4.8 --- .travis.yml | 22 +++++++++++++--------- Makefile | 23 +++++++++-------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/.travis.yml b/.travis.yml index f4b2cd62..00e4b840 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,4 @@ language: cpp -compiler: - - g++ - - clang sudo: false env: @@ -9,7 +6,10 @@ env: - JIT_OPTS='' TARGET_OPTS='target.py' matrix: - fast_finish: true + fast_finish: true + +install: + - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi script: - make PYTHON=python build @@ -19,12 +19,16 @@ script: addons: apt: + sources: + - ubuntu-toolchain-r-test packages: - - libffi-dev - - libedit-dev - - libboost-all-dev - - zlib1g-dev - - zlib-bin + - libffi-dev + - libedit-dev + - libboost-all-dev + - zlib1g-dev + - zlib-bin + - gcc-4.8 + - g++-4.8 notifications: irc: "chat.freenode.net#pixie-lang" diff --git a/Makefile b/Makefile index 358f5022..c660fe0b 100644 --- a/Makefile +++ b/Makefile @@ -18,17 +18,17 @@ help: @echo "make build_no_jit - build without jit" @echo "make fetch_externals - download and unpack external deps" -build_with_jit: fetch_externals +build_with_jit: fetch_externals re2_cre2 @if [ ! -d /usr/local/include/boost -a ! -d /usr/include/boost ] ; then echo "Boost C++ Library not found" && false; fi && \ $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) --opt=jit target.py && \ make compile_basics -build_no_jit: fetch_externals +build_no_jit: fetch_externals re2_cre2 @if [ ! -d /usr/local/include/boost -a ! -d /usr/include/boost ] ; then echo "Boost C++ Library not found" && false; fi && \ $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) target.py && \ make compile_basics -build_no_jit_shared: fetch_externals +build_no_jit_shared: fetch_externals re2_cre2 @if [ ! -d /usr/local/include/boost -a ! -d /usr/include/boost ] ; then echo "Boost C++ Library not found" && false; fi && \ $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) --shared target.py && \ make compile_basics @@ -36,7 +36,7 @@ build_no_jit_shared: fetch_externals compile_basics: @echo -e "\n\n\n\nWARNING: Compiling core libs. If you want to modify one of these files delete the .pxic files first\n\n\n\n" - ./pixie-vm -c pixie/uv.pxi -c pixie/io.pxi -c pixie/stacklets.pxi -c pixie/stdlib.pxi -c pixie/repl.pxi + ./pixie-vm -c pixie/uv.pxi -c pixie/io.pxi -c pixie/stacklets.pxi -c pixie/stdlib.pxi -c pixie/repl.pxi -c pixie/re.pxi build: fetch_externals re2_cre2 $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) $(JIT_OPTS) $(TARGET_OPTS) @@ -65,10 +65,8 @@ $(EXTERNALS)/re2: $(EXTERNALS) shasum -a 256 re2.tar.gz | grep -q f246c43897ac341568a7460622138ec0dd8de9b6f5459686376fa23e9d8c1bb8 && \ mkdir -p re2 && \ cd re2 && \ - tar -zxf ../re2.tar.gz --strip-components=1 - -$(EXTERNALS)/re2/obj/libre2.a: $(EXTERNALS)/re2 - cd $(EXTERNALS)/re2 && make + tar -zxf ../re2.tar.gz --strip-components=1 && \ + make $(EXTERNALS)/cre2: $(EXTERNALS) cd $(EXTERNALS) && \ @@ -76,19 +74,16 @@ $(EXTERNALS)/cre2: $(EXTERNALS) shasum -a 256 cre2.tar.xz | grep -q d31118dbc9d2b1cf95c1b763ca92ae2ec4e262b1f8d8e995c1ffdc8eb40a82fc && \ mkdir -p cre2 && \ cd cre2 && \ - tar -Jxf ../cre2.tar.xz --strip-components=1 - -$(EXTERNALS)/cre2/build/.libs/libcre2.a: $(EXTERNALS)/cre2 - cd $(EXTERNALS)/cre2 && \ + tar -Jxf ../cre2.tar.xz --strip-components=1 && \ mkdir -p build && \ cd build && \ ../configure LDFLAGS="-L`pwd`/../../re2/obj" CPPFLAGS="-I`pwd`/../../re2" && \ chmod +x ../meta/autotools/install-sh && \ make -re2: $(EXTERNALS)/re2/obj/libre2.a +re2: $(EXTERNALS)/re2 -cre2: $(EXTERNALS)/cre2/build/.libs/libcre2.a +cre2: $(EXTERNALS)/cre2 re2_cre2: re2 cre2 From 318e893b2d8fa20ed0bfc4029f6dffa9b031be75 Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 12:02:18 +0100 Subject: [PATCH 18/32] compile re2 with -fPIC --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c660fe0b..975a660b 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ $(EXTERNALS)/re2: $(EXTERNALS) mkdir -p re2 && \ cd re2 && \ tar -zxf ../re2.tar.gz --strip-components=1 && \ - make + make CPPFLAGS="-fPIC" $(EXTERNALS)/cre2: $(EXTERNALS) cd $(EXTERNALS) && \ From 0d97aa3af5e168a34002f5644ee2188ae2d53215 Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 13:49:39 +0100 Subject: [PATCH 19/32] extract cre2 out of pixie.re --- pixie/cre2.pxi | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++ pixie/re.pxi | 85 ++----------------------------------------------- 2 files changed, 88 insertions(+), 83 deletions(-) create mode 100644 pixie/cre2.pxi diff --git a/pixie/cre2.pxi b/pixie/cre2.pxi new file mode 100644 index 00000000..a51512f8 --- /dev/null +++ b/pixie/cre2.pxi @@ -0,0 +1,86 @@ +(ns pixie.cre2 + (:require [pixie.ffi-infer :as f])) + +(f/with-config {:library "cre2" + :cxx-flags ["-Lexternals/cre2/build/.libs" + "-lcre2" + "-Iexternals/cre2/src"] + :includes ["cre2.h"]} + + (f/defcstruct cre2_string_t [:data :length]) + (f/defcfn cre2_version_string) + + ;; Options + (f/defcfn cre2_opt_new) + (f/defcfn cre2_opt_delete) + (f/defcfn cre2_opt_set_posix_syntax) + (f/defcfn cre2_opt_set_longest_match) + (f/defcfn cre2_opt_set_log_errors) + (f/defcfn cre2_opt_set_literal) + (f/defcfn cre2_opt_set_never_nl) + (f/defcfn cre2_opt_set_case_sensitive) + (f/defcfn cre2_opt_set_perl_classes) + (f/defcfn cre2_opt_set_word_boundary) + (f/defcfn cre2_opt_set_one_line) + (f/defcfn cre2_opt_set_max_mem) + (f/defcfn cre2_opt_set_encoding) + + ;; Construction / destruction + (f/defcfn cre2_new) + (f/defcfn cre2_delete) + + ;; Inspection + (f/defcfn cre2_pattern) + (f/defcfn cre2_error_code) + (f/defcfn cre2_num_capturing_groups) + (f/defcfn cre2_program_size) + + ;; Errors something? + (f/defcfn cre2_error_string) + (f/defcfn cre2_error_arg) + + ;; Matching + (f/defcstruct cre2_range_t [:start :past]) + (f/defcfn cre2_match) + (f/defcfn cre2_easy_match) + (f/defcfn cre2_strings_to_ranges) +) + +(def cre2_optmap + { :ascii #(cre2_set_encoding % 2) + :posix #(cre2_opt_set_posix_syntax % 1) + :longest_match #(cre2_opt_set_longest_match % 1) + :silent #(cre2_opt_set_log_errors % 0) + :literal #(cre2_opt_set_literal % 1) + :never_nl #(cre2_opt_set_never_nl % 1) + :dot_nl #(cre2_opt_set_one_line % 0) + :never_capture #(do %) ;; ?? + :ignore_case #(cre2_opt_set_case_sensitive % 0) }) + +(defn cre2_make_opts [opts] + (let [opt (cre2_opt_new)] + (doseq [key opts] ((key cre2_optmap) opt)) + opt)) + +(defn cre2_run_match + [pattern text] + (= 1 + (cre2_match pattern + text (count text) + 0 (count text) + 1 ;; anchor 1 - no, 2 - start, 3 - both + (cre2_string_t) + (+ 1 (cre2_num_capturing_groups pattern))))) + +(deftype CRE2Regex [pattern opts] + IFinalize + (-finalize! [this] + (cre2_opt_delete opts) + (cre2_delete pattern)) + + IRegex + (re-matches [_ text] (cre2_run_match pattern text))) + +(defn make-re [pattern opts] + (let [copts (cre2_make_opts opts)] + (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) \ No newline at end of file diff --git a/pixie/re.pxi b/pixie/re.pxi index d0cb2d8d..f1775814 100644 --- a/pixie/re.pxi +++ b/pixie/re.pxi @@ -1,89 +1,9 @@ (ns pixie.re - (:require [pixie.ffi-infer :as i])) - -(i/with-config {:library "cre2" - :cxx-flags ["-Lexternals/cre2/build/.libs" - "-lcre2" - "-Iexternals/cre2/src"] - :includes ["cre2.h"]} - - (i/defcstruct cre2_string_t [:data :length]) - (i/defcfn cre2_version_string) - - ;; Options - (i/defcfn cre2_opt_new) - (i/defcfn cre2_opt_delete) - (i/defcfn cre2_opt_set_posix_syntax) - (i/defcfn cre2_opt_set_longest_match) - (i/defcfn cre2_opt_set_log_errors) - (i/defcfn cre2_opt_set_literal) - (i/defcfn cre2_opt_set_never_nl) - (i/defcfn cre2_opt_set_case_sensitive) - (i/defcfn cre2_opt_set_perl_classes) - (i/defcfn cre2_opt_set_word_boundary) - (i/defcfn cre2_opt_set_one_line) - (i/defcfn cre2_opt_set_max_mem) - (i/defcfn cre2_opt_set_encoding) - - ;; Construction / destruction - (i/defcfn cre2_new) - (i/defcfn cre2_delete) - - ;; Inspection - (i/defcfn cre2_pattern) - (i/defcfn cre2_error_code) - (i/defcfn cre2_num_capturing_groups) - (i/defcfn cre2_program_size) - - ;; Errors something? - (i/defcfn cre2_error_string) - (i/defcfn cre2_error_arg) - - ;; Matching - (i/defcstruct cre2_range_t [:start :past]) - (i/defcfn cre2_match) - (i/defcfn cre2_easy_match) - (i/defcfn cre2_strings_to_ranges) -) - -(def cre2_optmap - { :ascii #(cre2_set_encoding % 2) - :posix #(cre2_opt_set_posix_syntax % 1) - :longest_match #(cre2_opt_set_longest_match % 1) - :silent #(cre2_opt_set_log_errors % 0) - :literal #(cre2_opt_set_literal % 1) - :never_nl #(cre2_opt_set_never_nl % 1) - :dot_nl #(cre2_opt_set_one_line % 0) - :never_capture #(do %) ;; ?? - :ignore_case #(cre2_opt_set_case_sensitive % 0) }) - -(defn cre2_make_opts [opts] - (let [opt (cre2_opt_new)] - (doseq [key opts] ((key cre2_optmap) opt)) - opt)) - -(defn cre2_run_match - [pattern text] - (= 1 - (cre2_match pattern - text (count text) - 0 (count text) - 1 ;; anchor 1 - no, 2 - start, 3 - both - (cre2_string_t) - (+ 1 (cre2_num_capturing_groups pattern))))) + (:require [pixie.cre2])) (defprotocol IRegex (re-matches [r t])) -(deftype CRE2Regex [pattern opts] - IFinalize - (-finalize! [this] - (cre2_opt_delete opts) - (cre2_delete pattern)) - - IRegex - (re-matches [_ text] (cre2_run_match pattern text))) - (def ^:dynamic *default-re-engine* :cre2) ;; an "open" engine registry @@ -91,8 +11,7 @@ ;; add cre2 to registry (defmethod re-engine :cre2 [_ regex-str opts] - (let [copts (cre2_make_opts opts)] - (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) + (pixie.cre2/make-re regex-str opts))) (defn regex {:doc "Returns internal representation for regular From 466489a6c0be68663f07fabf6419e535367cdbba Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 14:02:31 +0100 Subject: [PATCH 20/32] move pixie.cre2 into pixie.re.cre2, move everything cre2 related out of pixie.re --- pixie/re.pxi | 7 +------ pixie/{ => re}/cre2.pxi | 12 +++++++----- 2 files changed, 8 insertions(+), 11 deletions(-) rename pixie/{ => re}/cre2.pxi (90%) diff --git a/pixie/re.pxi b/pixie/re.pxi index f1775814..9f07352c 100644 --- a/pixie/re.pxi +++ b/pixie/re.pxi @@ -1,5 +1,4 @@ -(ns pixie.re - (:require [pixie.cre2])) +(ns pixie.re) (defprotocol IRegex (re-matches [r t])) @@ -9,10 +8,6 @@ ;; an "open" engine registry (defmulti re-engine (fn [k s o] k)) -;; add cre2 to registry -(defmethod re-engine :cre2 [_ regex-str opts] - (pixie.cre2/make-re regex-str opts))) - (defn regex {:doc "Returns internal representation for regular expression, used in matching functions." diff --git a/pixie/cre2.pxi b/pixie/re/cre2.pxi similarity index 90% rename from pixie/cre2.pxi rename to pixie/re/cre2.pxi index a51512f8..6ff2eeff 100644 --- a/pixie/cre2.pxi +++ b/pixie/re/cre2.pxi @@ -1,5 +1,6 @@ -(ns pixie.cre2 - (:require [pixie.ffi-infer :as f])) +(ns pixie.re.cre2 + (:require [pixie.ffi-infer :as f] + [pixie.re :as re])) (f/with-config {:library "cre2" :cxx-flags ["-Lexternals/cre2/build/.libs" @@ -78,9 +79,10 @@ (cre2_opt_delete opts) (cre2_delete pattern)) - IRegex - (re-matches [_ text] (cre2_run_match pattern text))) + re/IRegex + (re/re-matches [_ text] (cre2_run_match pattern text))) -(defn make-re [pattern opts] +;; add cre2 to registry +(defmethod re/re-engine :cre2 [_ regex-str opts] (let [copts (cre2_make_opts opts)] (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) \ No newline at end of file From b9907ad04dad175c394636cd4b8ba6893479313d Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 14:22:01 +0100 Subject: [PATCH 21/32] link cre2 into lib/ and include/ --- Makefile | 6 +++++- pixie/re/cre2.pxi | 3 --- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 975a660b..64d2340e 100644 --- a/Makefile +++ b/Makefile @@ -78,12 +78,16 @@ $(EXTERNALS)/cre2: $(EXTERNALS) mkdir -p build && \ cd build && \ ../configure LDFLAGS="-L`pwd`/../../re2/obj" CPPFLAGS="-I`pwd`/../../re2" && \ - chmod +x ../meta/autotools/install-sh && \ + chmod +x ../meta/autotools/install-sh && \ make re2: $(EXTERNALS)/re2 cre2: $(EXTERNALS)/cre2 + mkdir -p lib/ include/ && \ + ln -sf ../$(EXTERNALS)/cre2/src/cre2.h include/ && \ + cd lib && \ + ln -sf ../$(EXTERNALS)/cre2/build/.libs/* ./ re2_cre2: re2 cre2 diff --git a/pixie/re/cre2.pxi b/pixie/re/cre2.pxi index 6ff2eeff..8f0d52c1 100644 --- a/pixie/re/cre2.pxi +++ b/pixie/re/cre2.pxi @@ -3,9 +3,6 @@ [pixie.re :as re])) (f/with-config {:library "cre2" - :cxx-flags ["-Lexternals/cre2/build/.libs" - "-lcre2" - "-Iexternals/cre2/src"] :includes ["cre2.h"]} (f/defcstruct cre2_string_t [:data :length]) From 1b8a296f9d0fafdeed1afe9a27184bb785c5422c Mon Sep 17 00:00:00 2001 From: keymone Date: Fri, 19 Feb 2016 14:29:14 +0100 Subject: [PATCH 22/32] compile pixie/re/cre2.pxi during build --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 64d2340e..ac8dbf55 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ build_no_jit_shared: fetch_externals re2_cre2 compile_basics: @echo -e "\n\n\n\nWARNING: Compiling core libs. If you want to modify one of these files delete the .pxic files first\n\n\n\n" - ./pixie-vm -c pixie/uv.pxi -c pixie/io.pxi -c pixie/stacklets.pxi -c pixie/stdlib.pxi -c pixie/repl.pxi -c pixie/re.pxi + ./pixie-vm -c pixie/uv.pxi -c pixie/io.pxi -c pixie/stacklets.pxi -c pixie/stdlib.pxi -c pixie/repl.pxi -c pixie/re.pxi -c pixie/re/cre2.pxi build: fetch_externals re2_cre2 $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) $(JIT_OPTS) $(TARGET_OPTS) From 8a044d09bb54303e04919bc22dbcc00f0b2e8a6c Mon Sep 17 00:00:00 2001 From: keymone Date: Sun, 21 Feb 2016 14:59:53 +0100 Subject: [PATCH 23/32] define re-* in stdlib --- pixie/re.pxi | 14 +++++++++----- pixie/re/cre2.pxi | 9 ++++----- pixie/stdlib.pxi | 2 ++ pixie/vm/reader.py | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pixie/re.pxi b/pixie/re.pxi index 9f07352c..e394c705 100644 --- a/pixie/re.pxi +++ b/pixie/re.pxi @@ -1,16 +1,20 @@ -(ns pixie.re) +(in-ns :pixie.stdlib) (defprotocol IRegex - (re-matches [r t])) + (re-matches [r t]) + (re-find [r t]) + (re-seq [r t])) -(def ^:dynamic *default-re-engine* :cre2) +(def ^:dynamic *default-re-engine* 'pixie.re.cre2) ;; an "open" engine registry (defmulti re-engine (fn [k s o] k)) -(defn regex +(defn re-pattern {:doc "Returns internal representation for regular expression, used in matching functions." :signatures [[rexeg-str opts]]} - ([pattern opts] (regex pattern opts *default-re-engine*)) + ([pattern opts] (re-pattern pattern opts *default-re-engine*)) ([pattern opts engine] (re-engine engine pattern opts))) + +(load-ns *default-re-engine*) diff --git a/pixie/re/cre2.pxi b/pixie/re/cre2.pxi index 8f0d52c1..5ec39c74 100644 --- a/pixie/re/cre2.pxi +++ b/pixie/re/cre2.pxi @@ -1,6 +1,5 @@ (ns pixie.re.cre2 - (:require [pixie.ffi-infer :as f] - [pixie.re :as re])) + (:require [pixie.ffi-infer :as f])) (f/with-config {:library "cre2" :includes ["cre2.h"]} @@ -76,10 +75,10 @@ (cre2_opt_delete opts) (cre2_delete pattern)) - re/IRegex - (re/re-matches [_ text] (cre2_run_match pattern text))) + IRegex + (pixie.stdlib/re-matches [_ text] (cre2_run_match pattern text))) ;; add cre2 to registry -(defmethod re/re-engine :cre2 [_ regex-str opts] +(defmethod pixie.stdlib/re-engine 'pixie.re.cre2 [_ regex-str opts] (let [copts (cre2_make_opts opts)] (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) \ No newline at end of file diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 311df7fb..dd8d719a 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -3092,3 +3092,5 @@ ex: (vary-meta x assoc :foo 42)" :added "0.1"} [f x] (->Iterate f x)) + +(load-ns 'pixie.re) diff --git a/pixie/vm/reader.py b/pixie/vm/reader.py index 9f313de2..59f44c4d 100644 --- a/pixie/vm/reader.py +++ b/pixie/vm/reader.py @@ -648,7 +648,7 @@ def invoke(self, rdr, ch): except EOFError: break - return rt.cons(symbol(u"pixie.re/regex"), + return rt.cons(symbol(u"pixie.stdlib/re-pattern"), rt.cons(regex_str, rt.cons(regex_opts, nil))) dispatch_handlers = { From 4c61558bb451ac04b881868cdfa0aa838b5a91f0 Mon Sep 17 00:00:00 2001 From: keymone Date: Thu, 25 Feb 2016 10:45:13 +0100 Subject: [PATCH 24/32] use dashes for non-C names, fake matches sequence --- pixie/re/cre2.pxi | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/pixie/re/cre2.pxi b/pixie/re/cre2.pxi index 5ec39c74..b22c473b 100644 --- a/pixie/re/cre2.pxi +++ b/pixie/re/cre2.pxi @@ -43,7 +43,7 @@ (f/defcfn cre2_strings_to_ranges) ) -(def cre2_optmap +(def cre2-optmap { :ascii #(cre2_set_encoding % 2) :posix #(cre2_opt_set_posix_syntax % 1) :longest_match #(cre2_opt_set_longest_match % 1) @@ -54,20 +54,33 @@ :never_capture #(do %) ;; ?? :ignore_case #(cre2_opt_set_case_sensitive % 0) }) -(defn cre2_make_opts [opts] +(defn cre2-make-opts [opts] (let [opt (cre2_opt_new)] - (doseq [key opts] ((key cre2_optmap) opt)) + (doseq [key opts] ((key cre2-optmap) opt)) opt)) -(defn cre2_run_match +(defn cre2-make-match-array [size] + (cre2_string_t)) + +(defn cre2-delete-match-array [arr size] + ) + +(defn cre2-matches-to-seq [matches size] + (repeat size true)) + +(defn cre2-matches [pattern text] - (= 1 - (cre2_match pattern - text (count text) - 0 (count text) - 1 ;; anchor 1 - no, 2 - start, 3 - both - (cre2_string_t) - (+ 1 (cre2_num_capturing_groups pattern))))) + (let [text-size (count text) + match-arr-size (+ 1 (cre2_num_capturing_groups pattern)) + match-arr (cre2-make-match-array match-arr-size) + result (cre2_match pattern + text text-size 0 text-size 1 + match-arr match-arr-size)] + (if (= 1 result) + (let [match-seq (cre2-matches-to-seq match-arr match-arr-size)] + (cre2-delete-match-array match-arr match-arr-size) + match-seq) + nil))) (deftype CRE2Regex [pattern opts] IFinalize @@ -76,9 +89,9 @@ (cre2_delete pattern)) IRegex - (pixie.stdlib/re-matches [_ text] (cre2_run_match pattern text))) + (pixie.stdlib/re-matches [_ text] (cre2-matches pattern text))) ;; add cre2 to registry (defmethod pixie.stdlib/re-engine 'pixie.re.cre2 [_ regex-str opts] - (let [copts (cre2_make_opts opts)] - (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) \ No newline at end of file + (let [copts (cre2-make-opts opts)] + (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) From fa240257e3591ead7d1731900a2a9d9963907790 Mon Sep 17 00:00:00 2001 From: Matt Carroll Date: Thu, 11 Feb 2016 16:12:40 +0000 Subject: [PATCH 25/32] Added iterate function and tests --- pixie/stdlib.pxi | 8 ++++++++ tests/pixie/tests/test-stdlib.pxi | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 46f656a1..93abc01c 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -3072,3 +3072,11 @@ ex: (vary-meta x assoc :foo 42)" (swap! cache assoc argsv ret) ret) val))))) + +(defn iterate + {:doc "Returns a lazy sequence of x, (f x), (f (f x)) etc. f must be free of + side-effects" + :signatures [[f x]] + :added "0.1"} + [f x] + (lazy-seq (cons x (iterate f (f x))))) diff --git a/tests/pixie/tests/test-stdlib.pxi b/tests/pixie/tests/test-stdlib.pxi index 6dfd3efc..0868cac8 100644 --- a/tests/pixie/tests/test-stdlib.pxi +++ b/tests/pixie/tests/test-stdlib.pxi @@ -761,3 +761,7 @@ (t/deftest test-memoize (let [f (memoize rand)] (t/assert= (f) (f)))) + +(t/deftest test-iterate + (t/assert= (take 5 (iterate inc 5)) '(5 6 7 8 9)) + (t/assert= (str (type (iterate inc 1))) "")) From 6dce9e301e4d9273b1a54b9818098519d45781ef Mon Sep 17 00:00:00 2001 From: Matt Carroll Date: Fri, 12 Feb 2016 16:10:32 +0000 Subject: [PATCH 26/32] Added IReduce to iterate --- pixie/stdlib.pxi | 10 ++++++++++ tests/pixie/tests/test-stdlib.pxi | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 93abc01c..de683993 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -3073,6 +3073,16 @@ ex: (vary-meta x assoc :foo 42)" ret) val))))) +(deftype Iterate [f x] + IReduce + (-reduce [self f init] + (loop [acc (f (if (nil? init) + (first self) + init))] + (if (reduced? acc) + @acc + (recur (f acc)))))) + (defn iterate {:doc "Returns a lazy sequence of x, (f x), (f (f x)) etc. f must be free of side-effects" diff --git a/tests/pixie/tests/test-stdlib.pxi b/tests/pixie/tests/test-stdlib.pxi index 0868cac8..70adcd6e 100644 --- a/tests/pixie/tests/test-stdlib.pxi +++ b/tests/pixie/tests/test-stdlib.pxi @@ -764,4 +764,5 @@ (t/deftest test-iterate (t/assert= (take 5 (iterate inc 5)) '(5 6 7 8 9)) - (t/assert= (str (type (iterate inc 1))) "")) + (t/assert= (str (type (iterate inc 1))) "") + (t/assert= (reduce (fn [a v] (if (< a 10) (+ a v) (reduced a))) (iterate (partial + 2) 1)) 16)) From 8151bf85a16ca229b7229c871a225707e458feef Mon Sep 17 00:00:00 2001 From: keymone Date: Sat, 13 Feb 2016 11:11:15 +0100 Subject: [PATCH 27/32] find python using which -a --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 15f691e5..e27182ee 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ all: help EXTERNALS=externals -PYTHON ?= python2 +PYTHON ?= `env which -a python2 python2.7 | head -n1` PYTHONPATH=$$PYTHONPATH:$(EXTERNALS)/pypy From dd01268d0acdaaecc77d0676338778e79d29e741 Mon Sep 17 00:00:00 2001 From: Matt Carroll Date: Mon, 15 Feb 2016 16:27:04 +0000 Subject: [PATCH 28/32] Refactored iterate into type and added IReduce --- pixie/stdlib.pxi | 14 ++++++++------ tests/pixie/tests/test-stdlib.pxi | 5 +++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index de683993..311df7fb 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -3075,13 +3075,15 @@ ex: (vary-meta x assoc :foo 42)" (deftype Iterate [f x] IReduce - (-reduce [self f init] - (loop [acc (f (if (nil? init) - (first self) - init))] + (-reduce [self rf init] + (loop [col (rest self) + acc (rf init (first self))] (if (reduced? acc) @acc - (recur (f acc)))))) + (recur (rest col) (rf acc (first col)))))) + ISeq + (-seq [self] + (cons x (lazy-seq* (fn [] (->Iterate f (f x))))))) (defn iterate {:doc "Returns a lazy sequence of x, (f x), (f (f x)) etc. f must be free of @@ -3089,4 +3091,4 @@ ex: (vary-meta x assoc :foo 42)" :signatures [[f x]] :added "0.1"} [f x] - (lazy-seq (cons x (iterate f (f x))))) + (->Iterate f x)) diff --git a/tests/pixie/tests/test-stdlib.pxi b/tests/pixie/tests/test-stdlib.pxi index 70adcd6e..12ad9912 100644 --- a/tests/pixie/tests/test-stdlib.pxi +++ b/tests/pixie/tests/test-stdlib.pxi @@ -764,5 +764,6 @@ (t/deftest test-iterate (t/assert= (take 5 (iterate inc 5)) '(5 6 7 8 9)) - (t/assert= (str (type (iterate inc 1))) "") - (t/assert= (reduce (fn [a v] (if (< a 10) (+ a v) (reduced a))) (iterate (partial + 2) 1)) 16)) + (t/assert= (reduce (fn [a v] (reduced "foo")) 0 (iterate inc 1)) "foo") + (t/assert= (reduce (fn [a v] (if (< a 10) (+ a v) (reduced a))) 0 (iterate (partial + 2) 1)) 16)) + From f6c402ed47bb9d1f855e233591ce0a2852154750 Mon Sep 17 00:00:00 2001 From: Matt Carroll Date: Thu, 18 Feb 2016 16:56:06 +0100 Subject: [PATCH 29/32] Added mkdtmp to stdlib --- pixie/stdlib.pxi | 1 + 1 file changed, 1 insertion(+) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 311df7fb..8953592d 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -17,6 +17,7 @@ (def srand (ffi-fn libc "srand" [CInt] CInt)) (def fopen (ffi-fn libc "fopen" [CCharP CCharP] CVoidP)) (def fread (ffi-fn libc "fread" [CVoidP CInt CInt CVoidP] CInt)) +(def mkdtemp (ffi-fn libc "mkdtemp" [CCharP] CCharP)) (def libm (ffi-library (str "libm." pixie.platform/so-ext))) (def atan2 (ffi-fn libm "atan2" [CDouble CDouble] CDouble)) From f891c5cf206daeb6bb51bdb00e98b34dcd8336b7 Mon Sep 17 00:00:00 2001 From: Matt Carroll Date: Thu, 18 Feb 2016 17:08:30 +0100 Subject: [PATCH 30/32] Added rmdir to stdlib --- pixie/stdlib.pxi | 1 + 1 file changed, 1 insertion(+) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 8953592d..02916e72 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -18,6 +18,7 @@ (def fopen (ffi-fn libc "fopen" [CCharP CCharP] CVoidP)) (def fread (ffi-fn libc "fread" [CVoidP CInt CInt CVoidP] CInt)) (def mkdtemp (ffi-fn libc "mkdtemp" [CCharP] CCharP)) +(def rmdir (ffi-fn libc "rmdir" [CCharP] CCharP)) (def libm (ffi-library (str "libm." pixie.platform/so-ext))) (def atan2 (ffi-fn libm "atan2" [CDouble CDouble] CDouble)) From 72a76a46b10b8d1d238fb3648101ea13d10cb79f Mon Sep 17 00:00:00 2001 From: Matt Carroll Date: Thu, 18 Feb 2016 17:26:15 +0100 Subject: [PATCH 31/32] Added mkdir and rm --- pixie/stdlib.pxi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 02916e72..8ccfe2df 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -18,7 +18,9 @@ (def fopen (ffi-fn libc "fopen" [CCharP CCharP] CVoidP)) (def fread (ffi-fn libc "fread" [CVoidP CInt CInt CVoidP] CInt)) (def mkdtemp (ffi-fn libc "mkdtemp" [CCharP] CCharP)) +(def mkdir (ffi-fn libc "mkdir" [CCharP] CCharP)) (def rmdir (ffi-fn libc "rmdir" [CCharP] CCharP)) +(def rm (ffi-fn libc "remove" [CCharP] CCharP)) (def libm (ffi-library (str "libm." pixie.platform/so-ext))) (def atan2 (ffi-fn libm "atan2" [CDouble CDouble] CDouble)) From 81c5788eee83d21eb4f6cfd1d234c3cd42d18694 Mon Sep 17 00:00:00 2001 From: Thomas Mulvaney Date: Fri, 19 Feb 2016 00:06:19 +0000 Subject: [PATCH 32/32] fixes the speed of iterates reduce the reduce implementation was creating some lazy lists --- pixie/stdlib.pxi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 8ccfe2df..3d9c2443 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -3080,11 +3080,11 @@ ex: (vary-meta x assoc :foo 42)" (deftype Iterate [f x] IReduce (-reduce [self rf init] - (loop [col (rest self) - acc (rf init (first self))] + (loop [next (f x) + acc (rf init x)] (if (reduced? acc) @acc - (recur (rest col) (rf acc (first col)))))) + (recur (f next) (rf acc next))))) ISeq (-seq [self] (cons x (lazy-seq* (fn [] (->Iterate f (f x)))))))