diff --git a/.gitignore b/.gitignore
index 70d7a1e1a..f647f11ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -185,4 +185,6 @@ fabric.properties
.ropeproject
README.html
.idea
-HOW_TO_RELEASE.txt
\ No newline at end of file
+HOW_TO_RELEASE.txt
+
+.vscode
\ No newline at end of file
diff --git a/howdoi/constants.py b/howdoi/constants.py
new file mode 100644
index 000000000..cb4dc91a5
--- /dev/null
+++ b/howdoi/constants.py
@@ -0,0 +1,48 @@
+import os
+import appdirs
+
+from howdoi.utils import u
+
+
+if os.getenv('HOWDOI_DISABLE_SSL'): # Set http instead of https
+ SCHEME = 'http://'
+ VERIFY_SSL_CERTIFICATE = False
+else:
+ SCHEME = 'https://'
+ VERIFY_SSL_CERTIFICATE = True
+
+CACHE_EMPTY_VAL = "NULL"
+
+CACHE_DIR = appdirs.user_cache_dir('howdoi')
+
+CACHE_ENTRY_MAX = 128
+
+SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
+
+SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi',
+ 'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
+
+ANSWER_HEADER = u('{2} Answer from {0} {2}\n{1}')
+
+STAR_HEADER = u('\u2605')
+
+
+BLOCK_INDICATORS = (
+ 'form id="captcha-form"',
+ 'This page appears when Google automatically detects requests coming from your computer '
+ 'network which appear to be in violation of the Terms of Service'
+)
+
+SEARCH_URLS = {
+ 'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
+ 'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
+ 'duckduckgo': SCHEME + 'duckduckgo.com/?q=site:{0}%20{1}&t=hj&ia=web'
+}
+
+USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
+ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
+ 'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
+ ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
+ 'Chrome/19.0.1084.46 Safari/536.5'),
+ ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
+ 'Safari/536.5'), )
diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py
index 235a52562..fe364f1f5 100755
--- a/howdoi/howdoi.py
+++ b/howdoi/howdoi.py
@@ -9,368 +9,41 @@
######################################################
from __future__ import print_function
-import gc
-gc.disable() # noqa: E402
+
import argparse
-import os
-import appdirs
-import re
-from cachelib import FileSystemCache, NullCache
+import gc
import json
-import requests
+import os
import sys
-from . import __version__
-
-from pygments import highlight
-from pygments.lexers import guess_lexer, get_lexer_by_name
-from pygments.formatters.terminal import TerminalFormatter
-from pygments.util import ClassNotFound
-
-from pyquery import PyQuery as pq
-from requests.exceptions import ConnectionError
-from requests.exceptions import SSLError
-
-# Handle imports for Python 2 and 3
-if sys.version < '3':
- import codecs
- from urllib import quote as url_quote
- from urllib import getproxies
- from urlparse import urlparse, parse_qs
-
- # Handling Unicode: http://stackoverflow.com/a/6633040/305414
- def u(x):
- return codecs.unicode_escape_decode(x)[0]
-else:
- from urllib.request import getproxies
- from urllib.parse import quote as url_quote, urlparse, parse_qs
-
- def u(x):
- return x
+import requests
+from cachelib import FileSystemCache, NullCache
+from requests.exceptions import ConnectionError, SSLError
-# rudimentary standardized 3-level log output
-def _print_err(x): print("[ERROR] " + x)
-
-
-_print_ok = print # noqa: E305
-def _print_dbg(x): print("[DEBUG] " + x) # noqa: E302
-
-
-if os.getenv('HOWDOI_DISABLE_SSL'): # Set http instead of https
- SCHEME = 'http://'
- VERIFY_SSL_CERTIFICATE = False
-else:
- SCHEME = 'https://'
- VERIFY_SSL_CERTIFICATE = True
-
-
-SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
-
-URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
-
-USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
- 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
- 'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
- ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
- 'Chrome/19.0.1084.46 Safari/536.5'),
- ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
- 'Safari/536.5'), )
-SEARCH_URLS = {
- 'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
- 'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
- 'duckduckgo': SCHEME + 'duckduckgo.com/?q=site:{0}%20{1}&t=hj&ia=web'
-}
-
-BLOCK_INDICATORS = (
- 'form id="captcha-form"',
- 'This page appears when Google automatically detects requests coming from your computer '
- 'network which appear to be in violation of the Terms of Service'
-)
+from howdoi.constants import (CACHE_DIR, CACHE_ENTRY_MAX,
+ SUPPORTED_HELP_QUERIES, SUPPORTED_SEARCH_ENGINES)
+from howdoi.plugins import StackOverflowPlugin
+from howdoi.utils import _print_err, _print_ok
-BLOCKED_QUESTION_FRAGMENTS = (
- 'webcache.googleusercontent.com',
-)
+from . import __version__
-STAR_HEADER = u('\u2605')
-ANSWER_HEADER = u('{2} Answer from {0} {2}\n{1}')
-NO_ANSWER_MSG = '< no answer given >'
+gc.disable() # noqa: E402
-CACHE_EMPTY_VAL = "NULL"
-CACHE_DIR = appdirs.user_cache_dir('howdoi')
-CACHE_ENTRY_MAX = 128
-SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi',
- 'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
+howdoi_session = requests.session()
if os.getenv('HOWDOI_DISABLE_CACHE'):
cache = NullCache() # works like an always empty cache
else:
cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)
-howdoi_session = requests.session()
-
-
-class BlockError(RuntimeError):
- pass
-
-
-def _random_int(width):
- bres = os.urandom(width)
- if sys.version < '3':
- ires = int(bres.encode('hex'), 16)
- else:
- ires = int.from_bytes(bres, 'little')
-
- return ires
-
-
-def _random_choice(seq):
- return seq[_random_int(1) % len(seq)]
-
-
-def get_proxies():
- proxies = getproxies()
- filtered_proxies = {}
- for key, value in proxies.items():
- if key.startswith('http'):
- if not value.startswith('http'):
- filtered_proxies[key] = 'http://%s' % value
- else:
- filtered_proxies[key] = value
- return filtered_proxies
-
-
-def _get_result(url):
- try:
- return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
- proxies=get_proxies(),
- verify=VERIFY_SSL_CERTIFICATE).text
- except requests.exceptions.SSLError as e:
- _print_err('Encountered an SSL Error. Try using HTTP instead of '
- 'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
- raise e
-
-
-def _add_links_to_text(element):
- hyperlinks = element.find('a')
-
- for hyperlink in hyperlinks:
- pquery_object = pq(hyperlink)
- href = hyperlink.attrib['href']
- copy = pquery_object.text()
- if (copy == href):
- replacement = copy
- else:
- replacement = "[{0}]({1})".format(copy, href)
- pquery_object.replace_with(replacement)
-
-
-def get_text(element):
- ''' return inner text in pyquery element '''
- _add_links_to_text(element)
- try:
- return element.text(squash_space=False)
- except TypeError:
- return element.text()
-
-
-def _extract_links_from_bing(html):
- html.remove_namespaces()
- return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
-
-
-def _extract_links_from_google(html):
- return [a.attrib['href'] for a in html('.l')] or \
- [a.attrib['href'] for a in html('.r')('a')]
-
-
-def _extract_links_from_duckduckgo(html):
- html.remove_namespaces()
- links_anchors = html.find('a.result__a')
- results = []
- for anchor in links_anchors:
- link = anchor.attrib['href']
- url_obj = urlparse(link)
- parsed_url = parse_qs(url_obj.query).get('uddg', '')
- if parsed_url:
- results.append(parsed_url[0])
- return results
-
-
-def _extract_links(html, search_engine):
- if search_engine == 'bing':
- return _extract_links_from_bing(html)
- if search_engine == 'duckduckgo':
- return _extract_links_from_duckduckgo(html)
- return _extract_links_from_google(html)
-
-
-def _get_search_url(search_engine):
- return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
-
-
-def _is_blocked(page):
- for indicator in BLOCK_INDICATORS:
- if page.find(indicator) != -1:
- return True
-
- return False
-
-
-def _get_links(query):
- search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
- search_url = _get_search_url(search_engine)
-
- result = _get_result(search_url.format(URL, url_quote(query)))
- if _is_blocked(result):
- _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
- 'Please wait a few minutes or select a different search engine.')
- raise BlockError("Temporary block by search engine")
-
- html = pq(result)
- return _extract_links(html, search_engine)
-
-
-def get_link_at_pos(links, position):
- if not links:
- return False
-
- if len(links) >= position:
- link = links[position - 1]
- else:
- link = links[-1]
- return link
-
-
-def _format_output(code, args):
- if not args['color']:
- return code
- lexer = None
-
- # try to find a lexer using the StackOverflow tags
- # or the query arguments
- for keyword in args['query'].split() + args['tags']:
- try:
- lexer = get_lexer_by_name(keyword)
- break
- except ClassNotFound:
- pass
-
- # no lexer found above, use the guesser
- if not lexer:
- try:
- lexer = guess_lexer(code)
- except ClassNotFound:
- return code
-
- return highlight(code,
- lexer,
- TerminalFormatter(bg='dark'))
-
-
-def _is_question(link):
- for fragment in BLOCKED_QUESTION_FRAGMENTS:
- if fragment in link:
- return False
- return re.search(r'questions/\d+/', link)
-
-
-def _get_questions(links):
- return [link for link in links if _is_question(link)]
-
-
-def _get_answer(args, links):
- link = get_link_at_pos(links, args['pos'])
- if not link:
- return False
-
- cache_key = link
- page = cache.get(link)
- if not page:
- page = _get_result(link + '?answertab=votes')
- cache.set(cache_key, page)
-
- html = pq(page)
-
- first_answer = html('.answer').eq(0)
-
- instructions = first_answer.find('pre') or first_answer.find('code')
- args['tags'] = [t.text for t in html('.post-tag')]
-
- if not instructions and not args['all']:
- text = get_text(first_answer.find('.post-text').eq(0))
- elif args['all']:
- texts = []
- for html_tag in first_answer.items('.post-text > *'):
- current_text = get_text(html_tag)
- if current_text:
- if html_tag[0].tag in ['pre', 'code']:
- texts.append(_format_output(current_text, args))
- else:
- texts.append(current_text)
- text = '\n'.join(texts)
- else:
- text = _format_output(get_text(instructions.eq(0)), args)
- if text is None:
- text = NO_ANSWER_MSG
- text = text.strip()
- return text
-
-
-def _get_links_with_cache(query):
- cache_key = query + "-links"
- res = cache.get(cache_key)
- if res:
- if res == CACHE_EMPTY_VAL:
- res = False
- return res
-
- links = _get_links(query)
- if not links:
- cache.set(cache_key, CACHE_EMPTY_VAL)
-
- question_links = _get_questions(links)
- cache.set(cache_key, question_links or CACHE_EMPTY_VAL)
-
- return question_links
-
def build_splitter(splitter_character='=', splitter_length=80):
return '\n' + splitter_character * splitter_length + '\n\n'
-def _get_answers(args):
- """
- @args: command-line arguments
- returns: array of answers and their respective metadata
- False if unable to get answers
- """
-
- question_links = _get_links_with_cache(args['query'])
- if not question_links:
- return False
-
- answers = []
- initial_position = args['pos']
- multiple_answers = (args['num_answers'] > 1 or args['all'])
-
- for answer_number in range(args['num_answers']):
- current_position = answer_number + initial_position
- args['pos'] = current_position
- link = get_link_at_pos(question_links, current_position)
- answer = _get_answer(args, question_links)
- if not answer:
- continue
- if not args['link'] and not args['json_output'] and multiple_answers:
- answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
- answer += '\n'
- answers.append({
- 'answer': answer,
- 'link': link,
- 'position': current_position
- })
-
- return answers
+def _get_cache_key(args):
+ return str(args) + __version__
def _clear_cache():
@@ -381,10 +54,6 @@ def _clear_cache():
return cache.clear()
-def _is_help_query(query: str):
- return any([query.lower() == help_query for help_query in SUPPORTED_HELP_QUERIES])
-
-
def _format_answers(res, args):
if "error" in res:
return res["error"]
@@ -393,16 +62,20 @@ def _format_answers(res, args):
return json.dumps(res)
formatted_answers = []
-
+
for answer in res:
next_ans = answer["answer"]
if args["link"]: # if we only want links
next_ans = answer["link"]
formatted_answers.append(next_ans)
-
+
return build_splitter().join(formatted_answers)
+def _is_help_query(query: str):
+ return any([query.lower() == help_query for help_query in SUPPORTED_HELP_QUERIES])
+
+
def _get_help_instructions():
instruction_splitter = build_splitter(' ', 60)
query = 'print hello world in python'
@@ -421,10 +94,6 @@ def _get_help_instructions():
return instruction_splitter.join(instructions)
-def _get_cache_key(args):
- return str(args) + __version__
-
-
def howdoi(raw_query):
args = raw_query
if type(raw_query) is str: # you can pass either a raw or a parsed query
@@ -443,7 +112,8 @@ def howdoi(raw_query):
return _format_answers(res, args)
try:
- res = _get_answers(args)
+ plugin = StackOverflowPlugin(cache=cache)
+ res = plugin.get_answers(args)
if not res:
res = {"error": "Sorry, couldn\'t find any help with that topic\n"}
cache.set(cache_key, res)
@@ -469,6 +139,8 @@ def get_parser():
action='store_true')
parser.add_argument('-e', '--engine', help='change search engine for this query only (google, bing, duckduckgo)',
dest='search_engine', nargs="?", default='google')
+ parser.add_argument('--plugin', help='query a specific plugin (default: stackoverflow)',
+ type=str, default='stackoverflow')
return parser
diff --git a/howdoi/plugins/__init__.py b/howdoi/plugins/__init__.py
new file mode 100644
index 000000000..678d85c40
--- /dev/null
+++ b/howdoi/plugins/__init__.py
@@ -0,0 +1,2 @@
+from howdoi.plugins.base import BasePlugin
+from howdoi.plugins.stackoverflow import StackOverflowPlugin
diff --git a/howdoi/plugins/base.py b/howdoi/plugins/base.py
new file mode 100644
index 000000000..4e224c747
--- /dev/null
+++ b/howdoi/plugins/base.py
@@ -0,0 +1,183 @@
+import os
+import sys
+import requests
+
+from cachelib import FileSystemCache, NullCache
+
+from pyquery import PyQuery as pq
+from howdoi.utils import _print_err, _random_choice
+from howdoi.constants import (
+ VERIFY_SSL_CERTIFICATE, BLOCK_INDICATORS, STAR_HEADER,
+ ANSWER_HEADER, CACHE_ENTRY_MAX, CACHE_DIR, USER_AGENTS, SEARCH_URLS
+)
+
+
+# Handle imports for Python 2 and 3
+if sys.version < '3':
+ from urllib import quote as url_quote
+ from urllib import getproxies
+ from urlparse import urlparse, parse_qs
+else:
+ from urllib.request import getproxies
+ from urllib.parse import quote as url_quote, urlparse, parse_qs
+
+
+if os.getenv('HOWDOI_DISABLE_CACHE'):
+ cache = NullCache() # works like an always empty cache
+else:
+ cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)
+
+
+URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
+
+
+class BlockError(RuntimeError):
+ pass
+
+
+howdoi_session = requests.session()
+
+
+class BasePlugin():
+ def __init__(self, cache=None):
+ if cache is None:
+ cache = NullCache()
+ self.cache = cache
+
+ def get_proxies(self):
+ proxies = getproxies()
+ filtered_proxies = {}
+ for key, value in proxies.items():
+ if key.startswith('http'):
+ if not value.startswith('http'):
+ filtered_proxies[key] = 'http://%s' % value
+ else:
+ filtered_proxies[key] = value
+ return filtered_proxies
+
+ def _get_result(self, url):
+ try:
+ return howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
+ proxies=self.get_proxies(),
+ verify=VERIFY_SSL_CERTIFICATE).text
+ except requests.exceptions.SSLError as e:
+ _print_err('Encountered an SSL Error. Try using HTTP instead of '
+ 'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')
+ raise e
+
+ def _get_links(self, query):
+ search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
+ search_url = self._get_search_url(search_engine)
+
+ result = self._get_result(search_url.format(URL, url_quote(query)))
+ if self._is_blocked(result):
+ _print_err('Unable to find an answer because the search engine temporarily blocked the request. '
+ 'Please wait a few minutes or select a different search engine.')
+ raise BlockError("Temporary block by search engine")
+
+ html = pq(result)
+ return self._extract_links(html, search_engine)
+
+ def _is_blocked(self, page):
+ for indicator in BLOCK_INDICATORS:
+ if page.find(indicator) != -1:
+ return True
+ return False
+
+ def _add_links_to_text(self, element):
+ hyperlinks = element.find('a')
+
+ for hyperlink in hyperlinks:
+ pquery_object = pq(hyperlink)
+ href = hyperlink.attrib['href']
+ copy = pquery_object.text()
+ if (copy == href):
+ replacement = copy
+ else:
+ replacement = "[{0}]({1})".format(copy, href)
+ pquery_object.replace_with(replacement)
+
+ def get_link_at_pos(self, links, position):
+ if not links:
+ return False
+ if len(links) >= position:
+ link = links[position - 1]
+ else:
+ link = links[-1]
+ return link
+
+ def get_text(self, element):
+ ''' return inner text in pyquery element '''
+ self._add_links_to_text(element)
+ try:
+ return element.text(squash_space=False)
+ except TypeError:
+ return element.text()
+
+ def _get_search_url(self, search_engine):
+ return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
+
+ def _extract_links_from_bing(self, html):
+ html.remove_namespaces()
+ return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
+
+ def _extract_links_from_google(self, html):
+ return [a.attrib['href'] for a in html('.l')] or \
+ [a.attrib['href'] for a in html('.r')('a')]
+
+ def _extract_links_from_duckduckgo(self, html):
+ html.remove_namespaces()
+ links_anchors = html.find('a.result__a')
+ results = []
+ for anchor in links_anchors:
+ link = anchor.attrib['href']
+ url_obj = urlparse(link)
+ parsed_url = parse_qs(url_obj.query).get('uddg', '')
+ if parsed_url:
+ results.append(parsed_url[0])
+ return results
+
+ def _extract_links(self, html, search_engine):
+ if search_engine == 'bing':
+ return self._extract_links_from_bing(html)
+ if search_engine == 'duckduckgo':
+ return self._extract_links_from_duckduckgo(html)
+ return self._extract_links_from_google(html)
+
+ def get_answer(self, args, links):
+ raise NotImplementedError
+
+ def _get_links_with_cache(self, query):
+ raise NotImplementedError
+
+ def get_answers(self, args):
+ """
+ @args: command-line arguments
+ returns: array of answers and their respective metadata
+ False if unable to get answers
+ """
+ question_links = self._get_links_with_cache(args['query'])
+ if not question_links:
+ return False
+
+ answers = []
+ initial_position = args['pos']
+ multiple_answers = (args['num_answers'] > 1 or args['all'])
+
+ for answer_number in range(args['num_answers']):
+ current_position = answer_number + initial_position
+ args['pos'] = current_position
+ link = self.get_link_at_pos(question_links, current_position)
+ answer = self.get_answer(args, question_links)
+ if not answer:
+ continue
+ if not args['link'] and not args['json_output'] and multiple_answers:
+ answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
+ answer += '\n'
+ answers.append({
+ 'answer': answer,
+ 'link': link,
+ 'position': current_position
+ })
+
+ return answers
diff --git a/howdoi/plugins/stackoverflow.py b/howdoi/plugins/stackoverflow.py
new file mode 100644
index 000000000..cb11275f7
--- /dev/null
+++ b/howdoi/plugins/stackoverflow.py
@@ -0,0 +1,109 @@
+import os
+import re
+
+from pygments import highlight
+from pygments.formatters.terminal import TerminalFormatter
+from pygments.lexers import get_lexer_by_name, guess_lexer
+from pygments.util import ClassNotFound
+
+from pyquery import PyQuery as pq
+from howdoi.plugins import BasePlugin
+
+URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
+
+CACHE_EMPTY_VAL = "NULL"
+
+NO_ANSWER_MSG = '< no answer given >'
+
+BLOCKED_QUESTION_FRAGMENTS = (
+ 'webself.cache.googleusercontent.com',
+)
+
+
+class StackOverflowPlugin(BasePlugin):
+ def _is_question(self, link):
+ for fragment in BLOCKED_QUESTION_FRAGMENTS:
+ if fragment in link:
+ return False
+ return re.search(r'questions/\d+/', link)
+
+ def _get_questions(self, links):
+ return [link for link in links if self._is_question(link)]
+
+ def _format_output(self, code, args):
+ if not args['color']:
+ return code
+ lexer = None
+
+ # try to find a lexer using the StackOverflow tags
+ # or the query arguments
+ for keyword in args['query'].split() + args['tags']:
+ try:
+ lexer = get_lexer_by_name(keyword)
+ break
+ except ClassNotFound:
+ pass
+
+ # no lexer found above, use the guesser
+ if not lexer:
+ try:
+ lexer = guess_lexer(code)
+ except ClassNotFound:
+ return code
+
+ return highlight(code,
+ lexer,
+ TerminalFormatter(bg='dark'))
+
+ def _get_links_with_cache(self, query):
+ cache_key = query + "-links"
+ res = self.cache.get(cache_key)
+ if res:
+ if res == CACHE_EMPTY_VAL:
+ res = False
+ return res
+
+ links = self._get_links(query)
+ if not links:
+ self.cache.set(cache_key, CACHE_EMPTY_VAL)
+
+ question_links = self._get_questions(links)
+
+ return question_links
+
+ def get_answer(self, args, links):
+ link = self.get_link_at_pos(links, args['pos'])
+ if not link:
+ return False
+
+ cache_key = link
+ page = self.cache.get(link)
+ if not page:
+ page = self._get_result(link + '?answertab=votes')
+ self.cache.set(cache_key, page)
+
+ html = pq(page)
+
+ first_answer = html('.answer').eq(0)
+
+ instructions = first_answer.find('pre') or first_answer.find('code')
+ args['tags'] = [t.text for t in html('.post-tag')]
+
+ if not instructions and not args['all']:
+ text = self.get_text(first_answer.find('.post-text').eq(0))
+ elif args['all']:
+ texts = []
+ for html_tag in first_answer.items('.post-text > *'):
+ current_text = self.get_text(html_tag)
+ if current_text:
+ if html_tag[0].tag in ['pre', 'code']:
+ texts.append(self._format_output(current_text, args))
+ else:
+ texts.append(current_text)
+ text = '\n'.join(texts)
+ else:
+ text = self._format_output(self.get_text(instructions.eq(0)), args)
+ if text is None:
+ text = NO_ANSWER_MSG
+ text = text.strip()
+ return text
diff --git a/howdoi/utils.py b/howdoi/utils.py
new file mode 100644
index 000000000..a60682430
--- /dev/null
+++ b/howdoi/utils.py
@@ -0,0 +1,41 @@
+import os
+import sys
+
+
+if sys.version < '3':
+ import codecs
+ # Handling Unicode: http://stackoverflow.com/a/6633040/305414
+
+ def u(x):
+ return codecs.unicode_escape_decode(x)[0]
+else:
+ def u(x):
+ return x
+
+
+# rudimentary standardized 3-level log output
+
+
+def _print_err(x):
+ print("[ERROR] " + x)
+
+
+_print_ok = print # noqa: E305
+
+
+def _print_dbg(x):
+ print("[DEBUG] " + x) # noqa: E302
+
+
+def _random_int(width):
+ bres = os.urandom(width)
+ if sys.version < '3':
+ ires = int(bres.encode('hex'), 16)
+ else:
+ ires = int.from_bytes(bres, 'little')
+
+ return ires
+
+
+def _random_choice(seq):
+ return seq[_random_int(1) % len(seq)]