diff --git a/src/tutorial/scrape1.clj b/src/tutorial/scrape1.clj index c13f8d7..a144ee8 100644 --- a/src/tutorial/scrape1.clj +++ b/src/tutorial/scrape1.clj @@ -2,9 +2,16 @@ (:require [net.cgrand.enlive-html :as html])) (def ^:dynamic *base-url* "https://news.ycombinator.com/") +(def ^:dynamic *user-agent* + "Mozilla/5.0 (fake User-Agent)") (defn fetch-url [url] - (html/html-resource (java.net.URL. url))) + (with-open [inputstream (-> (java.net.URL. url) + .openConnection + (doto (.setRequestProperty "User-Agent" + *user-agent*)) + .getContent)] + (html/html-resource inputstream))) (defn hn-headlines [] (map html/text (html/select (fetch-url *base-url*) [:td.title :a]))) diff --git a/src/tutorial/scrape2.clj b/src/tutorial/scrape2.clj index 6f0f726..f4b28b0 100644 --- a/src/tutorial/scrape2.clj +++ b/src/tutorial/scrape2.clj @@ -2,9 +2,16 @@ (:require [net.cgrand.enlive-html :as html])) (def ^:dynamic *base-url* "https://news.ycombinator.com/") +(def ^:dynamic *user-agent* + "Mozilla/5.0 (fake User-Agent)") (defn fetch-url [url] - (html/html-resource (java.net.URL. url))) + (with-open [inputstream (-> (java.net.URL. url) + .openConnection + (doto (.setRequestProperty "User-Agent" + *user-agent*)) + .getContent)] + (html/html-resource inputstream))) (defn hn-headlines-and-points [] (map html/text diff --git a/src/tutorial/scrape3.clj b/src/tutorial/scrape3.clj index c16346f..5fc2003 100644 --- a/src/tutorial/scrape3.clj +++ b/src/tutorial/scrape3.clj @@ -3,6 +3,8 @@ [clojure.string :as str])) (def ^:dynamic *base-url* "http://nytimes.com/") +(def ^:dynamic *user-agent* + "Mozilla/5.0 (fake User-Agent)") (def ^:dynamic *story-selector* [[:div.story @@ -20,7 +22,12 @@ (def ^:dynamic *summary-selector* [html/root :> :.summary]) (defn fetch-url [url] - (html/html-resource (java.net.URL. url))) + (with-open [inputstream (-> (java.net.URL. url) + .openConnection + (doto (.setRequestProperty "User-Agent" + *user-agent*)) + .getContent)] + (html/html-resource inputstream))) (defn stories [] (html/select (fetch-url *base-url*) *story-selector*))