diff --git a/.gitignore b/.gitignore
index 1d6fad3..34882de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,17 @@
+output/
venv
chromedriver
.DS_Store
*.exe
*.xml
+*.xml.gz
*.log
.idea/
*.pyc
+# include test files
+!test/assets/data/*.xml
+
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/paperscraper/__init__.py b/paperscraper/__init__.py
index e69de29..6088c6c 100644
--- a/paperscraper/__init__.py
+++ b/paperscraper/__init__.py
@@ -0,0 +1,25 @@
+from importlib.metadata import version
+import logging
+from loguru import logger
+
+__version__ = version(__package__)
+
+
+class __InterceptHandler(logging.Handler):
+ def emit(self, record):
+ # Get corresponding Loguru level if it exists
+ try:
+ level = logger.level(record.levelname).name
+ except ValueError:
+ level = record.levelno
+
+ # Find caller from where originated the logged message
+ frame, depth = logging.currentframe(), 2
+ while frame.f_code.co_filename == logging.__file__:
+ frame = frame.f_back
+ depth += 1
+
+ logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
+
+
+logging.basicConfig(handlers=[__InterceptHandler()], level=0)
diff --git a/paperscraper/_cli.py b/paperscraper/_cli.py
new file mode 100644
index 0000000..d6be844
--- /dev/null
+++ b/paperscraper/_cli.py
@@ -0,0 +1,53 @@
+import click
+from paperscraper._preprocess import (get_processed_db, get_extracted_data, get_processed_data)
+from paperscraper._postprocess import get_post_processed_data
+from paperscraper.config import config
+
+@click.group()
+def cli():
+ """Cli interface for paperscraper."""
+ pass
+
+
+@cli.group()
+def process():
+ """Process and setup database."""
+ pass
+
+
+@process.command()
+@click.option("-f", "--force", help="Force run all steps", is_flag=True)
+def process_db(force):
+ """Process the dblp xml file."""
+ get_processed_db(config=config, force=force)
+
+
+@process.command()
+@click.option("-f", "--force", help="Force run all steps", is_flag=True)
+def extract_data(force):
+ """Extract data from processed dblp xml file."""
+ get_extracted_data(config=config, force=force)
+
+
+@process.command()
+@click.option("-f", "--force", help="Force run all steps", is_flag=True)
+def process_data(force):
+ """Process extracted data."""
+ get_processed_data(config=config, force=force)
+
+
+@process.command()
+@click.option("-f", "--force", help="Force run all steps", is_flag=True)
+def post_process_data(force):
+ """Run cleanup process after processing data."""
+ get_post_processed_data(config=config, force=force)
+
+
+@process.command()
+@click.option("-f", "--force", help="Force run all steps", is_flag=True)
+def run_all(force):
+ """Run all steps in order."""
+ get_processed_db(config=config, force=force)
+ get_extracted_data(config=config, force=force)
+ get_processed_data(config=config, force=force)
+ get_post_processed_data(config=config, force=force)
diff --git a/paperscraper/_postprocess.py b/paperscraper/_postprocess.py
new file mode 100644
index 0000000..7f57abc
--- /dev/null
+++ b/paperscraper/_postprocess.py
@@ -0,0 +1,196 @@
+# External packages
+import ast
+import re
+import string
+import unicodedata
+
+from sqlitedict import SqliteDict
+from loguru import logger
+from tqdm import tqdm
+
+# Internal modules
+from paperscraper.config import config, Config
+
+regex = re.compile(r'[\n\r\t]')
+set_punctuations = set(string.punctuation)
+set_numbers = set("0123456789")
+
+logger.remove()
+logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+
+
+def _clean_string(_string):
+ _string_normalized = unicodedata.normalize("NFKD", _string)
+ _string_stripped = str(regex.sub("", _string_normalized)).strip()
+ _string_recoded = _string_stripped.encode('ascii', 'ignore').decode('UTF-8')
+ return _string_recoded
+
+
+def process_title(title_string):
+ """Ensure that there aren't new lines and that the titles are between X and Y characters in length."""
+ try:
+ if not (5 < len(title_string) < 250):
+ return None
+
+ return " ".join(title_string.split())
+ except Exception:
+ # print(e)
+ return None
+
+
+def process_abstract(abstract_string):
+ """Ensure that there aren't new lines and that the abstracts are between X and Y characters in length."""
+ try:
+ if abstract_string in ["Not Scraped", "Error", "No Url"]:
+ return None
+
+ if not (50 < len(abstract_string) < 2500):
+ return None
+
+ return " ".join(abstract_string.split())
+ except Exception:
+ # print(e)
+ return None
+
+
+def process_authors(author_string):
+ """
+ Convert utf-8 characters to ascii so that they are searchable via a keyboard.
+
+ (will result in data loss but ignore errors)
+ """
+ try:
+ author_list = ast.literal_eval(author_string)
+ if isinstance(author_list, list):
+ recoded_author_list = [string.capwords(_author.encode('ascii', 'ignore').decode('UTF-8')) for _author in author_list]
+ return str(recoded_author_list)
+ except Exception:
+ # print(e)
+ pass
+ return author_string
+
+
+def process_citation_counts(citation_count_string):
+ """Ensure that this is always NONE or NUMERIC."""
+ try:
+ if not citation_count_string.isnumeric():
+ return None
+ else:
+ return citation_count_string
+ except Exception:
+ return None
+
+
+def process_keywords(keywords_string):
+ """
+ Convert utf-8 characters to ascii so that they are searchable via a keyboard.
+
+ (will result in data loss but ignore errors)
+ """
+ try:
+ keywords_list = ast.literal_eval(keywords_string)
+ if isinstance(keywords_list, list):
+ processed_keywords_list = list()
+
+ for _keyword in keywords_list:
+ if "→" in _keyword:
+ kws = _keyword.split("→")
+ for kw in kws:
+ processed_keywords_list.append(kw)
+ elif "Key words: " in _keyword:
+ _keyword = re.sub("Key words: ","",_keyword)
+ kws = _keyword.split(" – ")
+ for kw in kws:
+ processed_keywords_list.append(kw)
+ else:
+ processed_keywords_list.append(_keyword)
+
+ # Start with removing Nones.
+ processed_keywords_list = list(filter(None, processed_keywords_list))
+
+ # Make them all lower-case for case insensitive match to be successful.
+ processed_keywords_list = [str(kw).lower() for kw in processed_keywords_list]
+
+ # Clean the Keyword String
+ processed_keywords_list = [_clean_string(kw) for kw in processed_keywords_list]
+
+ # Remove weird phrases in the Keyword that sometimes happens based on how it's maintained on the Publisher's website.
+ _interim_processed_list = []
+ for kw in processed_keywords_list:
+ for regex in config.keyword_patterns_to_remove:
+ kw = re.sub(regex, "", kw)
+ _interim_processed_list.append(kw)
+ processed_keywords_list = _interim_processed_list
+
+ # Remove keywords if it has Only keywords or Only punctuations
+ processed_keywords_list = [i for i in processed_keywords_list if not all(j in set_punctuations or j in set_numbers for j in i)]
+
+ # Finally, Remove None's again.
+ processed_keywords_list = list(filter(None, processed_keywords_list))
+
+ # Merge Different Variations of the same Keyword
+ _interim_processed_list = []
+ for kw in processed_keywords_list:
+ if kw in config.keywords_to_merge:
+ _interim_processed_list.append(config.keywords_to_merge[kw])
+ else:
+ _interim_processed_list.append(kw)
+ processed_keywords_list = _interim_processed_list
+
+ # And of course, de-duplicate if some have both HCI and Human-Computer Interaction initially.
+ processed_keywords_list = list(set(processed_keywords_list))
+
+ # Let's capitalize the keywords so that they look nice.
+ processed_keywords_list = [string.capwords(kw) for kw in processed_keywords_list]
+
+ return str(processed_keywords_list)
+ except Exception:
+ # print(e)
+ pass
+ return None
+
+
+def get_post_processed_data(config: Config, force: bool = False) -> SqliteDict:
+ """Process fields and return them."""
+ if force or not config.path_output.exists():
+ # Read it
+ papers_db = SqliteDict(config.path_output)
+
+ author_processed = []
+ keywords_processed = []
+ citation_count_processed = []
+ abstract_processed = []
+ title_processed = []
+
+ for index, row in tqdm(papers_db.items(), desc="Papers", total=len(papers_db)):
+ # Process authors
+ author_processed.append(process_authors(row["author"]))
+
+ # Process keywords
+ keywords_processed.append(process_keywords(row["keywords"]))
+
+ # Process citation counts
+ citation_count_processed.append(process_citation_counts(row["citation_count"]))
+
+ # Process abstract
+ abstract_processed.append(process_abstract(row["abstract"]))
+
+ # Process titles
+ title_processed.append(process_title(row["title"]))
+
+ papers_db.close()
+
+ # Commit all the data to db
+ scraped_input_db = SqliteDict(config.path_postprocessing_output)
+ scraped_input_db["author_processed"] = author_processed
+ scraped_input_db["keywords_processed"] = keywords_processed
+ scraped_input_db["citation_count_processed"] = citation_count_processed
+ scraped_input_db["abstract_processed"] = abstract_processed
+ scraped_input_db["title_processed"] = title_processed
+
+ # Save POST-PROCESSED FILE
+ scraped_input_db.commit()
+ else:
+ scraped_input_db = SqliteDict(config.path_postprocessing_output)
+
+ return scraped_input_db
diff --git a/paperscraper/_preprocess.py b/paperscraper/_preprocess.py
new file mode 100644
index 0000000..7cbde0f
--- /dev/null
+++ b/paperscraper/_preprocess.py
@@ -0,0 +1,392 @@
+import sys
+import re
+import time
+from pathlib import Path
+
+import lxml.etree as ET
+import pandas as pd
+from bs4 import BeautifulSoup
+from loguru import logger
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service
+from sqlitedict import SqliteDict
+from tqdm import tqdm
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+
+from paperscraper.config import Config, config
+from paperscraper.scrapers.abstracts import get_abstract
+from paperscraper.scrapers.citations import get_citation_count
+from paperscraper.scrapers.keywords import get_keywords
+
+logger.remove()
+logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+
+# List sources that are to be processed.
+# __publication_src = ["IEEE Visualization"]
+__publication_src = list(config.interesting_venues.keys())
+
+# Process only the below scraped STATES
+# Possible values: ["Not Scraped", "Error", "No Url"]
+__scraper_filter = {
+ "keywords": ["Not Scraped", "Error", "No Url"],
+ "abstract": ["Not Scraped", "Error", "No Url"],
+ "citation_count": ["Not Scraped", "Error", "No Url"],
+}
+
+
+def get_processed_db(force: bool = False) -> Path:
+ """
+ Clean the raw file (set in config.path_input_raw) and writing it out to config.path_input.
+
+ Function is run only if config.path_input doesn't exsit or if `force` is True.
+ """
+ if force or not config.path_input.exists():
+ logger.info(f"Cleaning data from {config.path_input_raw} into {config.path_input}")
+ # This Regular Find+Replace replaces instances of & between tags with a
+ # SPECIAL TAG `%26`. This tag will be replaced back to `&` in the code later on.
+ regex_find = r'(.*)&(.*)'
+ regex_replace = r'\1%26\2'
+
+ with open(config.path_input_raw, "r") as raw_dblp:
+ with open(config.path_input, "w") as processed_dblp:
+ for line in tqdm(raw_dblp, desc="Raw file line"):
+
+ # Iterations are needed because re.sub replaces just 1 instance at a time
+ intermediate_result = ""
+ while line != intermediate_result:
+ intermediate_result = line
+ line = re.sub(regex_find, regex_replace, line)
+
+ processed_dblp.write(line)
+
+ return config.path_input
+
+
+# TODO: Re-run this if
+# (1) The list has changed or
+# (2) There is a NEW DBLP snapshot.
+def get_extracted_data(config: Config, force: bool = False) -> tuple[SqliteDict, SqliteDict]:
+ """
+ FILTER the huge dblp_processed.xml file to keep just the data that we are interested in and Find Unique venues from the DBLP xml.
+
+ For unqiue venues looking ONLY for ["article","inproceedings","incollection"] and ["journal", "booktitle"].
+ """
+ if force or not config.path_output.exists():
+ logger.info(f"Extracting venues to {config.path_unique_venues}")
+ unique_sources = SqliteDict(config.path_unique_venues)
+ unique_sources.clear() # empty the db
+
+ logger.info(f"Extracting data to {config.path_output}")
+ result_list = SqliteDict(config.path_output)
+ result_list.clear() # empty the db
+ src_set = set()
+
+ _idx: dict[int, int] = {0: 0}
+
+ for event, elem in tqdm(ET.iterparse(config.path_input, encoding='UTF-8', events=("end", ) ,recover=True), desc="Entry"):
+ _idx[0] += 1
+
+ if elem.tag in ["article", "inproceedings", "incollection"]:
+ for child in elem.getchildren():
+ if child.tag in ["journal", "booktitle"]:
+ if child.text not in unique_sources:
+ child_dict = {}
+ child_dict["count"] = 0
+ child_dict["child_tag"] = child.tag
+ child_dict["elem_tag"] = elem.tag
+ else:
+ child_dict = unique_sources[child.text]
+
+ child_dict["count"] += 1
+ unique_sources[child.text] = child_dict
+
+ obj: dict = {}
+ # Initialize the fields that we are going to scrape.
+ # TODO: Update these if more fields are added.
+ obj["abstract"] = "Not Scraped"
+ obj["keywords"] = "Not Scraped"
+ obj["citation_count"] = "Not Scraped"
+ to_add = False
+ for child in elem.getchildren():
+ if child.tag not in obj:
+ if child.tag in ["author", "ee", "url"]:
+ obj[child.tag] = list()
+ else:
+ obj[child.tag] = None
+
+ if child.tag in ["author", "ee", "url"]:
+ if child.text is not None:
+ obj[child.tag].append(child.text.replace("%26", "&"))
+ else:
+ obj[child.tag].append(child.text)
+ else:
+ obj[child.tag] = child.text # title, year, pgs
+
+ # Only consider adding entries from the source defined above
+ if (child.text in config.interesting_venues and child.tag == config.interesting_venues[child.text]["sourcetype"]):
+ obj["source"] = child.text
+ to_add = True
+ if child.text not in src_set:
+ src_set.add(child.text)
+ logger.debug(f"Adding source: {child.text}")
+
+ if to_add:
+ result_list[_idx[0]] = obj
+
+ # Periodically commiting stuff
+ if _idx[0] % 200000 == 0:
+ unique_sources.commit()
+ result_list.commit()
+
+ # from https://stackoverflow.com/questions/7171140/using-python-iterparse-for-large-xml-files
+ # http://lxml.de/parsing.html#modifying-the-tree
+ # Based on Liza Daly's fast_iter
+ # http://www.ibm.com/developerworks/xml/library/x-hiperfparse/
+ # See also http://effbot.org/zone/element-iterparse.htm
+ # NOTE: deleting only the 2nd level nodes
+ if len(elem.getroottree().getpath(elem).split("/")) <= 3:
+ elem.clear()
+ while elem.getprevious() is not None:
+ del elem.getparent()[0]
+
+ logger.debug("Writing to disk")
+ # Save to disk
+ unique_sources.commit()
+ result_list.commit()
+ else:
+ logger.info(f"Loading data from {config.path_output}")
+ result_list = SqliteDict(config.path_output)
+ logger.info(f"Loading data from {config.path_unique_venues}")
+ unique_sources = SqliteDict(config.path_unique_venues)
+
+ return result_list, unique_sources
+
+
+# get a new headless Chrome driver
+def _get_webdriver_instance():
+ chrome_options = Options()
+ chrome_options.add_argument("--headless")
+ chrome_desired_capabilities = DesiredCapabilities.CHROME
+ chrome_desired_capabilities['goog:loggingPrefs'] = { 'browser':'ALL' }
+ # chrome_options.binary_location = config.path_chromeoptions_binary
+ # driver = webdriver.chrome(executable_path=config.path_chromedriver, chrome_options=chrome_options)
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),
+ chrome_options=chrome_options)
+ # driver.implicitly_wait(10000)
+ driver._old_get_method = driver.get
+ driver.get = lambda *args, **kwargs: get_browser_log_entries(driver, *args, **kwargs)
+ return driver
+
+
+def get_browser_log_entries(driver, *args, **kwargs):
+ """get log entreies from selenium and add to python logger before returning"""
+ ret_val = driver._old_get_method(*args, **kwargs)
+ loglevels = {
+ 'NOTSET': 'TRACE' ,
+ 'DEBUG': 'DEBUG' ,
+ 'INFO': 'INFO' ,
+ 'WARNING':'WARNING',
+ 'ERROR': 'ERROR',
+ 'SEVERE':'ERROR',
+ 'CRITICAL':'CRITICAL'
+ }
+
+ #get browser logs
+ slurped_logs = driver.get_log('browser')
+ for entry in slurped_logs:
+ #convert broswer log to python log format
+ rec = logger.log(loglevels.get(entry['level']), "{}: {}".format(entry['source'], entry['message']))
+ # rec.created = entry['timestamp'] /1000 # log using original timestamp.. us -> ms
+ # try:
+ # #add browser log to python log
+ # browserlog.handle(rec)
+ # except:
+ # print(entry)
+ #and return logs incase you want them
+ return ret_val
+
+
+def get_processed_data(config: Config, force: bool = False) -> SqliteDict:
+ """Scrap the Abstracts, Keywords, and Citations."""
+
+ logger.add(config.path_console_log_file)
+
+ if force or not config.path_output.exists():
+ # Get a webdriver instance (Headless Chrome)
+ logger.info(f"Processing data to {config.path_output}")
+ driver = _get_webdriver_instance()
+
+ # Read the base datafile
+ papers_db = SqliteDict(config.path_output)
+
+ # Initialize a log object to analyze the summary of a particular run.
+ log_obj: dict = {}
+
+ # Start scraping
+ for index, row in tqdm(papers_db.items(), desc="Papers", total=len(papers_db)):
+
+ # ToDo: Keep Checking this high-level filter to minimize iterations.
+ if (str(row["abstract"]) in __scraper_filter["abstract"] or
+ str(row["keywords"]) in __scraper_filter["keywords"] or
+ str(row["citation_count"]) in __scraper_filter["citation_count"]) \
+ and row["source"] in __publication_src:
+
+ if row["source"] not in log_obj:
+ log_obj[row["source"]] = dict()
+ log_obj[row["source"]]["papers"] = 0
+ log_obj[row["source"]]["abstract_parse_errors"] = 0
+ log_obj[row["source"]]["abstract_fetch_errors"] = 0
+ log_obj[row["source"]]["abstract_errors"] = 0
+ log_obj[row["source"]]["keyword_parse_errors"] = 0
+ log_obj[row["source"]]["keyword_fetch_errors"] = 0
+ log_obj[row["source"]]["keyword_errors"] = 0
+ log_obj[row["source"]]["no_of_citations_parse_errors"] = 0
+ log_obj[row["source"]]["no_of_citations_fetch_errors"] = 0
+ log_obj[row["source"]]["no_of_citations_errors"] = 0
+
+ logger.debug("Processing {} ".format(row["title"]))
+ # Increment no of papers
+ log_obj[row["source"]]["papers"] += 1
+
+ # Get the URLs
+ urls = []
+ try:
+ urls = row["ee"]
+ except Exception:
+ # If not ee, check url.
+ # But, this doesn't have HTTP/HTTPS it seems to be following some Relative Paths from a
+ # BaseURL that is unknown. Hence, it will fail 99% of the times.
+ try:
+ urls = row["url"]
+ except Exception:
+ pass
+
+ # If there is No url OR If the URL begins with a db/, continue.
+ if len(urls) == 0 or urls[0].startswith("db/"):
+ row['abstract'] = "No Url"
+ row['abstract'] = "No Url"
+ row['abstract'] = "No Url"
+ papers_db[index] = row
+ logger.error(str(index) + " [No URL]: " + str(row["title"]))
+ continue
+
+ # ABSTRACT
+ abstract_soup = None
+ try:
+ driver.get(urls[0])
+
+ # Delay to ensure routings are complete, page renders
+ time.sleep(1.5)
+
+ # Initialize the Soup object
+ abstract_soup = BeautifulSoup(driver.page_source, 'lxml')
+
+ except Exception as e:
+ logger.error(f'{index} Abstract: ' + str(e))
+
+ if abstract_soup is not None:
+ is_abstract = False
+ for publisher in config.interesting_venues[row["source"]]["publishers"]:
+ abstract = get_abstract(publisher, abstract_soup)
+ if abstract is not None:
+ row['abstract'] = abstract
+ logger.info(str(index) + " [Success][Abstract] " + str(urls[0]) + " " + str(abstract)[:50])
+ is_abstract = True
+ break
+
+ if not is_abstract:
+ row['abstract'] = "Error"
+ logger.error(str(index) + " [Abstract Parse]: " + str(urls[0]) + " : " + str(row["source"]))
+ log_obj[row["source"]]["abstract_parse_errors"] += 1
+ log_obj[row["source"]]["abstract_errors"] += 1
+
+ else:
+ row['abstract'] = "Error"
+ logger.error(str(index) + " [Abstract URL Fetch]: " + str(row["source"]))
+ log_obj[row["source"]]["abstract_fetch_errors"] += 1
+ log_obj[row["source"]]["abstract_errors"] += 1
+
+ # No. of CITATIONS
+ citation_soup = abstract_soup
+ if citation_soup is not None:
+ is_citation = False
+ for publisher in config.interesting_venues[row["source"]]["publishers"]:
+ citation_count = get_citation_count(publisher, citation_soup)
+ if citation_count is not None:
+ row['citation_count'] = citation_count
+ logger.info(str(index) + " [Success][Citation Count] " + str(urls[0]) + " " + str(citation_count))
+ is_citation = True
+ break
+
+ if not is_citation:
+ row['citation_count'] = "Error"
+ logger.error(str(index) + " [Citation Parse]: " + str(urls[0]) + " : " + str(row["source"]))
+ log_obj[row["source"]]["no_of_citations_parse_errors"] += 1
+ log_obj[row["source"]]["no_of_citations_errors"] += 1
+
+ else:
+ row['citation_count'] = "Error"
+ logger.error(str(index) + " [Citation Count URL Fetch]: " + str(row["source"]))
+ log_obj[row["source"]]["no_of_citations_fetch_errors"] += 1
+ log_obj[row["source"]]["no_of_citations_errors"] += 1
+
+ # KEYWORDS
+ # Redirect to a different URL to fetch KEYWORDS in some cases.
+ is_keyword = False
+ current_url = driver.current_url
+ for publisher in config.interesting_venues[row["source"]]["publishers"]:
+ try:
+ if publisher == "ieee_explore":
+ driver.get(current_url + "/keywords#keywords")
+ elif publisher == "eurographics_digital_library":
+ driver.get(current_url + "?show=full")
+ else:
+ driver.get(current_url)
+
+ # Delay to ensure routings are complete, page renders
+ time.sleep(1.5)
+
+ # Initialize the Soup object
+ keyword_soup = BeautifulSoup(driver.page_source, 'lxml')
+
+ if keyword_soup is not None:
+ keywords_list = get_keywords(publisher, keyword_soup)
+ if keywords_list is not None:
+ row['keywords'] = keywords_list
+ logger.info(str(index) + " [Success][Keywords] " + str(urls[0]) + " " + str(keywords_list))
+ is_keyword = True
+ break
+ else:
+ row['keywords'] = "Error"
+ logger.error(str(index) + " [Keywords URL Fetch]: " + str(row["source"]))
+ log_obj[row["source"]]["keyword_fetch_errors"] += 1
+ log_obj[row["source"]]["keyword_errors"] += 1
+
+ except Exception as e:
+ logger.error(f'{index} Keywords: ' + str(e))
+
+ if not is_keyword:
+ row['keywords'] = "Error"
+ logger.error(str(index) + " [Error][Keywords Parse]: " + str(urls[0]) + " : " + str(row["source"]))
+ log_obj[row["source"]]["keyword_parse_errors"] += 1
+ log_obj[row["source"]]["keyword_errors"] += 1
+
+ papers_db[index] = row
+
+ papers_db.commit()
+
+ # Persist the paper file
+ papers_db.commit()
+ logger.info("scraped papers saved to disk.")
+
+ # Persist Logs
+ df_logs = pd.DataFrame.from_dict(log_obj, orient="index")
+ logger.info(log_obj)
+ df_logs.to_csv(config.path_logfile, sep='\t', header=True)
+ else:
+ logger.info(f"Loading processed data from {config.path_output}")
+ papers_db = SqliteDict(config.path_output)
+
+ return papers_db
diff --git a/paperscraper/config.py b/paperscraper/config.py
index 19ee3e0..64c04c4 100644
--- a/paperscraper/config.py
+++ b/paperscraper/config.py
@@ -1,286 +1,317 @@
-import os
+from datetime import datetime
+from pathlib import Path
+from typing import Union
-# ToDo: [Update as required] Paths to important input/output files
-path_input_raw = os.path.join("..", "assets", "data", "dblp-2020-11-01.xml")
-path_input = os.path.join("..", "assets", "data", "dblp_processed.xml")
-path_output = os.path.join("..", "output", "output.tsv")
-path_postprocessing_output = os.path.join("..", "output", "output_processed.tsv")
-path_unique_venues = os.path.join("..", "output", "unique_venues.tsv")
-path_unique_keywords = os.path.join("..", "output", "unique_keywords.tsv")
-path_unique_authors = os.path.join("..", "output", "unique_authors.tsv")
-path_logfile = os.path.join("..", "output", "log.tsv")
-# ChromeDriver
-# TODO Option 1: Manual Download from https://chromedriver.chromium.org/downloads (e.g., ChromeDriver 86.0.4240.22) and save to a known location in PATH
-# TODO Option 2: Install using brew: `brew cask install chromedriver`. It is generally saved to `/usr/local/bin/chromedriver`
-# For Mac OSX, the executable will have to be quarantined - `xattr -d com.apple.quarantine chromedriver`
-# Set the chromedriver path below.
-path_chromedriver = os.path.join("..", "assets", "chromedriver") # /usr/local/bin/chromedriver
+class Config:
+ """The main config object."""
+ def __init__(self, root_dir: Union[str, Path] = None,
+ assets_dir: Union[str, Path] = None,
+ output_dir: Union[str, Path] = None):
+ """Initialize the config."""
+ if root_dir is None:
+ _root_dir = Path(__file__).parent.parent
+ else:
+ _root_dir = Path(root_dir)
-# ChromeOptions binary
-# TODO: [Update this path depending on where it is located in your Operating System]
-path_chromeoptions_binary = os.path.join("/", "Applications", "Google Chrome.app", "Contents", "MacOS", "Google Chrome")
+ if assets_dir is None:
+ assets_dir = _root_dir / "assets"
+ elif not isinstance(assets_dir, Path):
+ assets_dir = Path(assets_dir)
-# List of Venues we target with their DBLP category. This information can be found in the path above.
-# TODO: [Update as required] Don't forget to add the corresponding logic to scrape keywords/absracts/titles/citations, etc.
-interesting_venues = {
- "ACM Trans. Comput. Hum. Interact.": {
- "sourcetype": "journal",
- "publishers": ["acm_digital_library"]
- },
- "AVI": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "BCS HCI": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library", "scienceopen", "springer_v2"]
- },
- "BCS HCI (1)": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "BCS HCI (2)": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "BELIV": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library", "ieee_explore"]
- },
- "BioVis": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "CHI": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "Cognitive Biases in Visualizations": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2"]
- },
- "CogSci": {
- "sourcetype": "booktitle",
- "publishers": ["cogsci"]
- },
- "Comput. Graph. Forum": {
- "sourcetype": "journal",
- "publishers": ["wiley_online_library"]
- },
- "Conference on Designing Interactive Systems": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "Conference on Designing Interactive Systems (Companion Volume)": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "CSCW": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "Diagrams": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2"]
- },
- "Eurographics": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2", "eurographics_digital_library"]
- },
- "Eurographics (Areas Papers)": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library"]
- },
- "Eurographics (Posters)": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library"]
- },
- "Eurographics (Short Papers)": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library"]
- },
- "Eurographics (Short Presentations)": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library" ]
- },
- "Eurographics (State of the Art Reports)": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library" ]
- },
- "EuroVAST@EuroVis": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library"]
- },
- "Graphics Interface": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library", "graphics_interface_proceedings"]
- },
- "ICDM": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2", "ieee_explore"]
- },
- "IEEE Computer Graphics and Applications": {
- "sourcetype": "journal",
- "publishers": ["ieee_explore"]
- },
- "IEEE Trans. Vis. Comput. Graph.": {
- "sourcetype": "journal",
- "publishers": ["ieee_explore"]
- },
- "IEEE VAST": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "IEEE Visualization": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "IEEE VIS (Short Papers)": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "Information Visualization": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2", "dagstuhl"]
- },
- "INTERACT": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2"]
- },
- "INTERACT (1)": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2"]
- },
- "INTERACT (2)": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2"]
- },
- "INTERACT (3)": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2"]
- },
- "INTERACT (4)": {
- "sourcetype": "booktitle",
- "publishers": ["springer_v2"]
- },
- "International Conference on Supercomputing": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "IUI": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "IV": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "IV (1)": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "IV (2)": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "IVAPP": {
- "sourcetype": "booktitle",
- "publishers": ["scitepress"]
- },
- "J. Vis.": {
- "sourcetype": "journal",
- "publishers": ["springer_v1"]
- },
- "KDD": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library", "aaai"]
- },
- "PacificVis": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "SciVis": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "SIBGRAPI": {
- "sourcetype": "booktitle",
- "publishers": ["ieee_explore"]
- },
- "SIGGRAPH": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "SIGGRAPH Asia": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "SIGMOD Conference": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "UbiComp": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library", "springer_v2"]
- },
- "UIST": {
- "sourcetype": "booktitle",
- "publishers": ["acm_digital_library"]
- },
- "VAST": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library", "ieee_explore" ]
- },
- "VAST (Short and Project Papers)": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library", "ieee_explore" ]
- },
- "VCBM": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library"]
- },
- "Vis. Comput.": {
- "sourcetype": "journal",
- "publishers": ["springer_v1"]
- },
- "VMV": {
- "sourcetype": "booktitle",
- "publishers": ["eurographics_digital_library"]
- }
-}
+ if output_dir is None:
+ output_dir = _root_dir / "output"
+ elif not isinstance(output_dir, Path):
+ assets_dir = Path(output_dir)
-# Object to map different variations of a keyword to a consistent name.
-keywords_to_merge = {
- "cscw": "computer supported collaborative work",
- "computer supported collaborative work": "computer supported collaborative work",
- "data visualization": "data visualization",
- "data visualisation": "data visualization",
- "visualisation": "visualization",
- "visualization": "visualization",
- "hci": "human computer interaction",
- "human computer interaction": "human computer interaction",
- "human-computer-interaction": "human computer interaction",
- "human-computer interaction": "human computer interaction",
- "human computer interaction (hci)": "human computer interaction",
- "human-computer interaction (hci)": "human computer interaction",
- "human computer interactions": "human computer interaction",
- "human-computer-interactions": "human computer interaction",
- "human-computer interactions": "human computer interaction",
-}
+ # TODO: [Update as required] Paths to important input/output files
+ # FIXME: automatically extract the latest
+ self.path_input_raw = assets_dir / "data" / "dblp-2022-11-02.xml"
+ self.path_input = assets_dir / "data" / "dblp_processed.xml"
+ self.path_output = output_dir / "output.db"
+ self.path_postprocessing_output = output_dir / "output_processed.tsv"
+ self.path_unique_venues = output_dir / "unique_venues.db"
+ self.path_unique_keywords = output_dir / "unique_keywords.tsv"
+ self.path_unique_authors = output_dir/ "unique_authors.tsv"
-keyword_patterns_to_remove = [
- r"\d+.\d+.\d+.", # e.g., 1.3.4.
- r"\d+.\d+.\d+", # e.g., 1.3.4
- r"\w+.\d+.\d+.", # e.g., d.3.4.
- r"\w+.\d+.\d+", # e.g., d.3.4
- r"according to",
- r"acm ccs",
- r"acmccs",
- r"acma ccs",
- r"\(\s*\)",
- r"\/spl",
- r"\/sup",
- r"\/",
- r"^-\s*"
-]
+ datetime_str = f"{datetime.now():%Y-%m-%d_%H-%M-%S%z}"
+ self.path_logfile = output_dir / f"log-{datetime_str}.tsv"
+ self.path_console_log_file = output_dir / f"console-{datetime_str}.log"
+
+ # ChromeDriver
+ # TODO Option 1: Manual Download from https://chromedriver.chromium.org/downloads (e.g., ChromeDriver 86.0.4240.22) and save to a known location in PATH
+ # TODO Option 2: Install using brew: `brew cask install chromedriver`. It is generally saved to `/usr/local/bin/chromedriver`
+ # For Mac OSX, the executable will have to be quarantined - `xattr -d com.apple.quarantine chromedriver`
+ # Set the chromedriver path below.
+ self.path_chromedriver = assets_dir / "chromedriver" # /usr/local/bin/chromedriver
+
+ # ChromeOptions binary
+ # TODO: [Update this path depending on where it is located in your Operating System]
+ self.path_chromeoptions_binary = Path("/") / "Applications" / "Google Chrome.app" / "Contents" / "MacOS" / "Google Chrome"
+
+ # List of Venues we target with their DBLP category. This information can be found in the path above.
+ # TODO: [Update as required] Don't forget to add the corresponding logic to scrape keywords/absracts/titles/citations, etc.
+ self.interesting_venues = {
+ "ACM Trans. Comput. Hum. Interact.": {
+ "sourcetype": "journal",
+ "publishers": ["acm_digital_library"]
+ },
+ "AVI": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "BCS HCI": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library", "scienceopen", "springer_v2"]
+ },
+ "BCS HCI (1)": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "BCS HCI (2)": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "BELIV": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library", "ieee_explore"]
+ },
+ "BioVis": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "CHI": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "Cognitive Biases in Visualizations": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2"]
+ },
+ "CogSci": {
+ "sourcetype": "booktitle",
+ "publishers": ["cogsci"]
+ },
+ "Comput. Graph. Forum": {
+ "sourcetype": "journal",
+ "publishers": ["wiley_online_library"]
+ },
+ "Conference on Designing Interactive Systems": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "Conference on Designing Interactive Systems (Companion Volume)": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "CSCW": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "Diagrams": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2"]
+ },
+ "Eurographics": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2", "eurographics_digital_library"]
+ },
+ "Eurographics (Areas Papers)": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library"]
+ },
+ "Eurographics (Posters)": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library"]
+ },
+ "Eurographics (Short Papers)": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library"]
+ },
+ "Eurographics (Short Presentations)": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library" ]
+ },
+ "Eurographics (State of the Art Reports)": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library" ]
+ },
+ "EuroVAST@EuroVis": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library"]
+ },
+ "Graphics Interface": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library", "graphics_interface_proceedings"]
+ },
+ "ICDM": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2", "ieee_explore"]
+ },
+ "IEEE Computer Graphics and Applications": {
+ "sourcetype": "journal",
+ "publishers": ["ieee_explore"]
+ },
+ "IEEE Trans. Vis. Comput. Graph.": {
+ "sourcetype": "journal",
+ "publishers": ["ieee_explore"]
+ },
+ "IEEE VAST": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "IEEE Visualization": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "IEEE VIS (Short Papers)": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "Information Visualization": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2", "dagstuhl"]
+ },
+ "INTERACT": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2"]
+ },
+ "INTERACT (1)": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2"]
+ },
+ "INTERACT (2)": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2"]
+ },
+ "INTERACT (3)": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2"]
+ },
+ "INTERACT (4)": {
+ "sourcetype": "booktitle",
+ "publishers": ["springer_v2"]
+ },
+ "International Conference on Supercomputing": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "IUI": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "IV": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "IV (1)": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "IV (2)": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "IVAPP": {
+ "sourcetype": "booktitle",
+ "publishers": ["scitepress"]
+ },
+ "J. Vis.": {
+ "sourcetype": "journal",
+ "publishers": ["springer_v1"]
+ },
+ "KDD": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library", "aaai"]
+ },
+ "PacificVis": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "SciVis": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "SIBGRAPI": {
+ "sourcetype": "booktitle",
+ "publishers": ["ieee_explore"]
+ },
+ "SIGGRAPH": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "SIGGRAPH Asia": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "SIGMOD Conference": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "UbiComp": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library", "springer_v2"]
+ },
+ "UIST": {
+ "sourcetype": "booktitle",
+ "publishers": ["acm_digital_library"]
+ },
+ "VAST": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library", "ieee_explore" ]
+ },
+ "VAST (Short and Project Papers)": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library", "ieee_explore" ]
+ },
+ "VCBM": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library"]
+ },
+ "Vis. Comput.": {
+ "sourcetype": "journal",
+ "publishers": ["springer_v1"]
+ },
+ "VMV": {
+ "sourcetype": "booktitle",
+ "publishers": ["eurographics_digital_library"]
+ }
+ }
+
+ # Object to map different variations of a keyword to a consistent name.
+ self.keywords_to_merge = {
+ "cscw": "computer supported collaborative work",
+ "computer supported collaborative work": "computer supported collaborative work",
+ "data visualization": "data visualization",
+ "data visualisation": "data visualization",
+ "visualisation": "visualization",
+ "visualization": "visualization",
+ "hci": "human computer interaction",
+ "human computer interaction": "human computer interaction",
+ "human-computer-interaction": "human computer interaction",
+ "human-computer interaction": "human computer interaction",
+ "human computer interaction (hci)": "human computer interaction",
+ "human-computer interaction (hci)": "human computer interaction",
+ "human computer interactions": "human computer interaction",
+ "human-computer-interactions": "human computer interaction",
+ "human-computer interactions": "human computer interaction",
+ }
+
+ self.keyword_patterns_to_remove = [
+ r"\d+.\d+.\d+.", # e.g., 1.3.4.
+ r"\d+.\d+.\d+", # e.g., 1.3.4
+ r"\w+.\d+.\d+.", # e.g., d.3.4.
+ r"\w+.\d+.\d+", # e.g., d.3.4
+ r"according to",
+ r"acm ccs",
+ r"acmccs",
+ r"acma ccs",
+ r"\(\s*\)",
+ r"\/spl",
+ r"\/sup",
+ r"\/",
+ r"^-\s*"
+ ]
+
+
+config = Config()
diff --git a/paperscraper/scrapers/keywords.py b/paperscraper/scrapers/keywords.py
index e6e9ffb..85cc14b 100644
--- a/paperscraper/scrapers/keywords.py
+++ b/paperscraper/scrapers/keywords.py
@@ -1,10 +1,13 @@
import re
+from loguru import logger
regex = re.compile(r'[\n\r\t]')
def acm_digital_library(soup):
try:
+ # TODO: Get keyoards by clicking on the citation linke (soup.select('a[data-title="Export Citation"]'))
+ # Then using the the ActionChains from selenium to click, parse the bib result and get keywords
keywords = set()
keywords_parent_ol = soup.find('ol', class_="rlist organizational-chart")
keywords_divs = keywords_parent_ol.findChildren('div', recursive=True)
@@ -13,12 +16,12 @@ def acm_digital_library(soup):
keywords.add(regex.sub("", kw.split(",")[0]))
return list(keywords)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def graphics_interface_proceedings(soup):
- return None
+ return []
def ieee_explore(soup):
@@ -36,8 +39,8 @@ def ieee_explore(soup):
keywords.add(str(regex.sub("", str(keywords_l.text).split(",")[0])))
return list(keywords)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def eurographics_digital_library(soup):
@@ -65,8 +68,8 @@ def eurographics_digital_library(soup):
keywords_set.update(re.split(',|:|;', keywords_str))
return list(keywords_set)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def springer_v2(soup):
@@ -78,8 +81,8 @@ def springer_v2(soup):
keywords.add(k.text)
return list(keywords)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def dagstuhl(soup):
@@ -91,8 +94,8 @@ def dagstuhl(soup):
if keywords_font is not None:
return re.split(',', keywords_font.text)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def springer_v1(soup):
@@ -105,8 +108,8 @@ def springer_v1(soup):
keywords.add(str(regex.sub("", kw)).strip())
return list(keywords)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def wiley_online_library(soup):
@@ -138,12 +141,12 @@ def wiley_online_library(soup):
return list(keywords_set)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def cogsci(soup):
- return None
+ return []
def scitepress(soup):
@@ -154,8 +157,8 @@ def scitepress(soup):
keywords_set.add(kw)
return list(keywords_set)
except Exception as e:
- print(e)
- return None
+ logger.error(e)
+ return []
def scienceopen(soup):
@@ -168,11 +171,11 @@ def scienceopen(soup):
return list(keywords_set)
except Exception as e:
pass
- return None
+ return []
def aaai(soup):
- return None
+ return []
def get_keywords(publisher, soup):
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000..3c92596
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,914 @@
+[[package]]
+name = "async-generator"
+version = "1.10"
+description = "Async generators and context managers for Python 3.5+"
+category = "main"
+optional = false
+python-versions = ">=3.5"
+
+[[package]]
+name = "attrs"
+version = "21.4.0"
+description = "Classes Without Boilerplate"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[package.extras]
+dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"]
+docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
+tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"]
+tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.10.0"
+description = "Screen-scraping library"
+category = "main"
+optional = false
+python-versions = ">3.0.0"
+
+[package.dependencies]
+soupsieve = ">1.2"
+
+[package.extras]
+html5lib = ["html5lib"]
+lxml = ["lxml"]
+
+[[package]]
+name = "certifi"
+version = "2021.10.8"
+description = "Python package for providing Mozilla's CA Bundle."
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "cffi"
+version = "1.15.0"
+description = "Foreign Function Interface for Python calling C code."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+pycparser = "*"
+
+[[package]]
+name = "charset-normalizer"
+version = "2.0.12"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+category = "main"
+optional = false
+python-versions = ">=3.5.0"
+
+[package.extras]
+unicode_backport = ["unicodedata2"]
+
+[[package]]
+name = "click"
+version = "8.0.4"
+description = "Composable command line interface toolkit"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.4"
+description = "Cross-platform colored terminal text."
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[[package]]
+name = "cryptography"
+version = "36.0.2"
+description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+cffi = ">=1.12"
+
+[package.extras]
+docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx_rtd_theme"]
+docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
+pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
+sdist = ["setuptools_rust (>=0.11.4)"]
+ssh = ["bcrypt (>=3.1.5)"]
+test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pytz"]
+
+[[package]]
+name = "debugpy"
+version = "1.6.3"
+description = "An implementation of the Debug Adapter Protocol for Python"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "exceptiongroup"
+version = "1.0.4"
+description = "Backport of PEP 654 (exception groups)"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+test = ["pytest (>=6)"]
+
+[[package]]
+name = "h11"
+version = "0.13.0"
+description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "idna"
+version = "3.3"
+description = "Internationalized Domain Names in Applications (IDNA)"
+category = "main"
+optional = false
+python-versions = ">=3.5"
+
+[[package]]
+name = "iniconfig"
+version = "1.1.1"
+description = "iniconfig: brain-dead simple config-ini parsing"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "loguru"
+version = "0.6.0"
+description = "Python logging made (stupidly) simple"
+category = "main"
+optional = false
+python-versions = ">=3.5"
+
+[package.dependencies]
+colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
+win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
+
+[package.extras]
+dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils (==0.16)", "flake8 (>=3.7.7)", "isort (>=5.1.1)", "pytest (>=4.6.2)", "pytest-cov (>=2.7.1)", "sphinx-autobuild (>=0.7.1)", "sphinx-rtd-theme (>=0.4.3)", "tox (>=3.9.0)"]
+
+[[package]]
+name = "lxml"
+version = "4.8.0"
+description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
+
+[package.extras]
+cssselect = ["cssselect (>=0.7)"]
+html5 = ["html5lib"]
+htmlsoup = ["BeautifulSoup4"]
+source = ["Cython (>=0.29.7)"]
+
+[[package]]
+name = "memory-profiler"
+version = "0.61.0"
+description = "A module for monitoring memory usage of a python program"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[package.dependencies]
+psutil = "*"
+
+[[package]]
+name = "numpy"
+version = "1.22.3"
+description = "NumPy is the fundamental package for array computing with Python."
+category = "main"
+optional = false
+python-versions = ">=3.8"
+
+[[package]]
+name = "outcome"
+version = "1.1.0"
+description = "Capture the outcome of Python function calls."
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+attrs = ">=19.2.0"
+
+[[package]]
+name = "packaging"
+version = "21.3"
+description = "Core utilities for Python packages"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
+
+[[package]]
+name = "pandas"
+version = "1.4.1"
+description = "Powerful data structures for data analysis, time series, and statistics"
+category = "main"
+optional = false
+python-versions = ">=3.8"
+
+[package.dependencies]
+numpy = [
+ {version = ">=1.18.5", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
+ {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""},
+ {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""},
+ {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
+]
+python-dateutil = ">=2.8.1"
+pytz = ">=2020.1"
+
+[package.extras]
+test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
+
+[[package]]
+name = "pluggy"
+version = "1.0.0"
+description = "plugin and hook calling mechanisms for python"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
+[[package]]
+name = "psutil"
+version = "5.9.4"
+description = "Cross-platform lib for process and system monitoring in Python."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[package.extras]
+test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
+
+[[package]]
+name = "pycparser"
+version = "2.21"
+description = "C parser in Python"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "pyopenssl"
+version = "22.0.0"
+description = "Python wrapper module around the OpenSSL library"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+cryptography = ">=35.0"
+
+[package.extras]
+docs = ["sphinx", "sphinx-rtd-theme"]
+test = ["flaky", "pretend", "pytest (>=3.0.1)"]
+
+[[package]]
+name = "pyparsing"
+version = "3.0.9"
+description = "pyparsing module - Classes and methods to define and execute parsing grammars"
+category = "dev"
+optional = false
+python-versions = ">=3.6.8"
+
+[package.extras]
+diagrams = ["jinja2", "railroad-diagrams"]
+
+[[package]]
+name = "pysocks"
+version = "1.7.1"
+description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "pytest"
+version = "7.2.0"
+description = "pytest: simple powerful testing with Python"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+attrs = ">=19.2.0"
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
+
+[[package]]
+name = "pytest-mock"
+version = "3.10.0"
+description = "Thin-wrapper around the mock package for easier use with pytest"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+pytest = ">=5.0"
+
+[package.extras]
+dev = ["pre-commit", "pytest-asyncio", "tox"]
+
+[[package]]
+name = "python-dateutil"
+version = "2.8.2"
+description = "Extensions to the standard Python datetime module"
+category = "main"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+name = "pytz"
+version = "2021.3"
+description = "World timezone definitions, modern and historical"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "requests"
+version = "2.27.1"
+description = "Python HTTP for Humans."
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
+idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
+urllib3 = ">=1.21.1,<1.27"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
+use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
+
+[[package]]
+name = "selenium"
+version = "4.1.3"
+description = ""
+category = "main"
+optional = false
+python-versions = "~=3.7"
+
+[package.dependencies]
+trio = ">=0.17,<1.0"
+trio-websocket = ">=0.9,<1.0"
+urllib3 = {version = ">=1.26,<2.0", extras = ["secure", "socks"]}
+
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "sniffio"
+version = "1.2.0"
+description = "Sniff out which async library your code is running under"
+category = "main"
+optional = false
+python-versions = ">=3.5"
+
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "soupsieve"
+version = "2.3.1"
+description = "A modern CSS selector implementation for Beautiful Soup."
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "sqlitedict"
+version = "2.0.0"
+description = "Persistent dict in Python, backed up by sqlite3 and pickle, multithread-safe."
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "tqdm"
+version = "4.63.0"
+description = "Fast, Extensible Progress Meter"
+category = "main"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["py-make (>=0.1.0)", "twine", "wheel"]
+notebook = ["ipywidgets (>=6)"]
+telegram = ["requests"]
+
+[[package]]
+name = "trio"
+version = "0.20.0"
+description = "A friendly Python library for async concurrency and I/O"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+async-generator = ">=1.9"
+attrs = ">=19.2.0"
+cffi = {version = ">=1.14", markers = "os_name == \"nt\" and implementation_name != \"pypy\""}
+idna = "*"
+outcome = "*"
+sniffio = "*"
+sortedcontainers = "*"
+
+[[package]]
+name = "trio-websocket"
+version = "0.9.2"
+description = "WebSocket library for Trio"
+category = "main"
+optional = false
+python-versions = ">=3.5"
+
+[package.dependencies]
+async-generator = ">=1.10"
+trio = ">=0.11"
+wsproto = ">=0.14"
+
+[[package]]
+name = "urllib3"
+version = "1.26.9"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
+
+[package.dependencies]
+certifi = {version = "*", optional = true, markers = "extra == \"secure\""}
+cryptography = {version = ">=1.3.4", optional = true, markers = "extra == \"secure\""}
+idna = {version = ">=2.0.0", optional = true, markers = "extra == \"secure\""}
+pyOpenSSL = {version = ">=0.14", optional = true, markers = "extra == \"secure\""}
+PySocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""}
+
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
+secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)"]
+socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
+
+[[package]]
+name = "webdriver-manager"
+version = "3.5.4"
+description = "Library provides the way to automatically manage drivers for different browsers"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+requests = "*"
+
+[[package]]
+name = "win32-setctime"
+version = "1.1.0"
+description = "A small Python utility to set file creation time on Windows"
+category = "main"
+optional = false
+python-versions = ">=3.5"
+
+[package.extras]
+dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
+
+[[package]]
+name = "wsproto"
+version = "1.1.0"
+description = "WebSockets state-machine based protocol implementation"
+category = "main"
+optional = false
+python-versions = ">=3.7.0"
+
+[package.dependencies]
+h11 = ">=0.9.0,<1"
+
+[metadata]
+lock-version = "1.1"
+python-versions = "~=3.8"
+content-hash = "963f00872e5cf8e48cf9a053276d77ea593d40b80b6c670c1c2e7e5d37309c33"
+
+[metadata.files]
+async-generator = [
+ {file = "async_generator-1.10-py3-none-any.whl", hash = "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b"},
+ {file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"},
+]
+attrs = [
+ {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"},
+ {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"},
+]
+beautifulsoup4 = [
+ {file = "beautifulsoup4-4.10.0-py3-none-any.whl", hash = "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf"},
+ {file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"},
+]
+certifi = [
+ {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
+ {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
+]
+cffi = [
+ {file = "cffi-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962"},
+ {file = "cffi-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0"},
+ {file = "cffi-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14"},
+ {file = "cffi-1.15.0-cp27-cp27m-win32.whl", hash = "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474"},
+ {file = "cffi-1.15.0-cp27-cp27m-win_amd64.whl", hash = "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6"},
+ {file = "cffi-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27"},
+ {file = "cffi-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023"},
+ {file = "cffi-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2"},
+ {file = "cffi-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e"},
+ {file = "cffi-1.15.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7"},
+ {file = "cffi-1.15.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3"},
+ {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c"},
+ {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962"},
+ {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382"},
+ {file = "cffi-1.15.0-cp310-cp310-win32.whl", hash = "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55"},
+ {file = "cffi-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0"},
+ {file = "cffi-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e"},
+ {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39"},
+ {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc"},
+ {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032"},
+ {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8"},
+ {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605"},
+ {file = "cffi-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e"},
+ {file = "cffi-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc"},
+ {file = "cffi-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636"},
+ {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4"},
+ {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997"},
+ {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b"},
+ {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2"},
+ {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7"},
+ {file = "cffi-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66"},
+ {file = "cffi-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029"},
+ {file = "cffi-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880"},
+ {file = "cffi-1.15.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20"},
+ {file = "cffi-1.15.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024"},
+ {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e"},
+ {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728"},
+ {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6"},
+ {file = "cffi-1.15.0-cp38-cp38-win32.whl", hash = "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c"},
+ {file = "cffi-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443"},
+ {file = "cffi-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a"},
+ {file = "cffi-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37"},
+ {file = "cffi-1.15.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a"},
+ {file = "cffi-1.15.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e"},
+ {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"},
+ {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df"},
+ {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8"},
+ {file = "cffi-1.15.0-cp39-cp39-win32.whl", hash = "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a"},
+ {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"},
+ {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
+]
+charset-normalizer = [
+ {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
+ {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
+]
+click = [
+ {file = "click-8.0.4-py3-none-any.whl", hash = "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1"},
+ {file = "click-8.0.4.tar.gz", hash = "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"},
+]
+colorama = [
+ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
+ {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
+]
+cryptography = [
+ {file = "cryptography-36.0.2-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:4e2dddd38a5ba733be6a025a1475a9f45e4e41139d1321f412c6b360b19070b6"},
+ {file = "cryptography-36.0.2-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:4881d09298cd0b669bb15b9cfe6166f16fc1277b4ed0d04a22f3d6430cb30f1d"},
+ {file = "cryptography-36.0.2-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea634401ca02367c1567f012317502ef3437522e2fc44a3ea1844de028fa4b84"},
+ {file = "cryptography-36.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:7be666cc4599b415f320839e36367b273db8501127b38316f3b9f22f17a0b815"},
+ {file = "cryptography-36.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8241cac0aae90b82d6b5c443b853723bcc66963970c67e56e71a2609dc4b5eaf"},
+ {file = "cryptography-36.0.2-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b2d54e787a884ffc6e187262823b6feb06c338084bbe80d45166a1cb1c6c5bf"},
+ {file = "cryptography-36.0.2-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:c2c5250ff0d36fd58550252f54915776940e4e866f38f3a7866d92b32a654b86"},
+ {file = "cryptography-36.0.2-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ec6597aa85ce03f3e507566b8bcdf9da2227ec86c4266bd5e6ab4d9e0cc8dab2"},
+ {file = "cryptography-36.0.2-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ca9f686517ec2c4a4ce930207f75c00bf03d94e5063cbc00a1dc42531511b7eb"},
+ {file = "cryptography-36.0.2-cp36-abi3-win32.whl", hash = "sha256:f64b232348ee82f13aac22856515ce0195837f6968aeaa94a3d0353ea2ec06a6"},
+ {file = "cryptography-36.0.2-cp36-abi3-win_amd64.whl", hash = "sha256:53e0285b49fd0ab6e604f4c5d9c5ddd98de77018542e88366923f152dbeb3c29"},
+ {file = "cryptography-36.0.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:32db5cc49c73f39aac27574522cecd0a4bb7384e71198bc65a0d23f901e89bb7"},
+ {file = "cryptography-36.0.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b3d199647468d410994dbeb8cec5816fb74feb9368aedf300af709ef507e3e"},
+ {file = "cryptography-36.0.2-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:da73d095f8590ad437cd5e9faf6628a218aa7c387e1fdf67b888b47ba56a17f0"},
+ {file = "cryptography-36.0.2-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:0a3bf09bb0b7a2c93ce7b98cb107e9170a90c51a0162a20af1c61c765b90e60b"},
+ {file = "cryptography-36.0.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8897b7b7ec077c819187a123174b645eb680c13df68354ed99f9b40a50898f77"},
+ {file = "cryptography-36.0.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82740818f2f240a5da8dfb8943b360e4f24022b093207160c77cadade47d7c85"},
+ {file = "cryptography-36.0.2-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1f64a62b3b75e4005df19d3b5235abd43fa6358d5516cfc43d87aeba8d08dd51"},
+ {file = "cryptography-36.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e167b6b710c7f7bc54e67ef593f8731e1f45aa35f8a8a7b72d6e42ec76afd4b3"},
+ {file = "cryptography-36.0.2.tar.gz", hash = "sha256:70f8f4f7bb2ac9f340655cbac89d68c527af5bb4387522a8413e841e3e6628c9"},
+]
+debugpy = [
+ {file = "debugpy-1.6.3-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:c4b2bd5c245eeb49824bf7e539f95fb17f9a756186e51c3e513e32999d8846f3"},
+ {file = "debugpy-1.6.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b8deaeb779699350deeed835322730a3efec170b88927debc9ba07a1a38e2585"},
+ {file = "debugpy-1.6.3-cp310-cp310-win32.whl", hash = "sha256:fc233a0160f3b117b20216f1169e7211b83235e3cd6749bcdd8dbb72177030c7"},
+ {file = "debugpy-1.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:dda8652520eae3945833e061cbe2993ad94a0b545aebd62e4e6b80ee616c76b2"},
+ {file = "debugpy-1.6.3-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:d5c814596a170a0a58fa6fad74947e30bfd7e192a5d2d7bd6a12156c2899e13a"},
+ {file = "debugpy-1.6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c4cd6f37e3c168080d61d698390dfe2cd9e74ebf80b448069822a15dadcda57d"},
+ {file = "debugpy-1.6.3-cp37-cp37m-win32.whl", hash = "sha256:3c9f985944a30cfc9ae4306ac6a27b9c31dba72ca943214dad4a0ab3840f6161"},
+ {file = "debugpy-1.6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:5ad571a36cec137ae6ed951d0ff75b5e092e9af6683da084753231150cbc5b25"},
+ {file = "debugpy-1.6.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:adcfea5ea06d55d505375995e150c06445e2b20cd12885bcae566148c076636b"},
+ {file = "debugpy-1.6.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:daadab4403427abd090eccb38d8901afd8b393e01fd243048fab3f1d7132abb4"},
+ {file = "debugpy-1.6.3-cp38-cp38-win32.whl", hash = "sha256:6efc30325b68e451118b795eff6fe8488253ca3958251d5158106d9c87581bc6"},
+ {file = "debugpy-1.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:86d784b72c5411c833af1cd45b83d80c252b77c3bfdb43db17c441d772f4c734"},
+ {file = "debugpy-1.6.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:4e255982552b0edfe3a6264438dbd62d404baa6556a81a88f9420d3ed79b06ae"},
+ {file = "debugpy-1.6.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cca23cb6161ac89698d629d892520327dd1be9321c0960e610bbcb807232b45d"},
+ {file = "debugpy-1.6.3-cp39-cp39-win32.whl", hash = "sha256:7c302095a81be0d5c19f6529b600bac971440db3e226dce85347cc27e6a61908"},
+ {file = "debugpy-1.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:34d2cdd3a7c87302ba5322b86e79c32c2115be396f3f09ca13306d8a04fe0f16"},
+ {file = "debugpy-1.6.3-py2.py3-none-any.whl", hash = "sha256:84c39940a0cac410bf6aa4db00ba174f973eef521fbe9dd058e26bcabad89c4f"},
+ {file = "debugpy-1.6.3.zip", hash = "sha256:e8922090514a890eec99cfb991bab872dd2e353ebb793164d5f01c362b9a40bf"},
+]
+exceptiongroup = [
+ {file = "exceptiongroup-1.0.4-py3-none-any.whl", hash = "sha256:542adf9dea4055530d6e1279602fa5cb11dab2395fa650b8674eaec35fc4a828"},
+ {file = "exceptiongroup-1.0.4.tar.gz", hash = "sha256:bd14967b79cd9bdb54d97323216f8fdf533e278df937aa2a90089e7d6e06e5ec"},
+]
+h11 = [
+ {file = "h11-0.13.0-py3-none-any.whl", hash = "sha256:8ddd78563b633ca55346c8cd41ec0af27d3c79931828beffb46ce70a379e7442"},
+ {file = "h11-0.13.0.tar.gz", hash = "sha256:70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06"},
+]
+idna = [
+ {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
+ {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
+]
+iniconfig = [
+ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
+ {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
+]
+loguru = [
+ {file = "loguru-0.6.0-py3-none-any.whl", hash = "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"},
+ {file = "loguru-0.6.0.tar.gz", hash = "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c"},
+]
+lxml = [
+ {file = "lxml-4.8.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:e1ab2fac607842ac36864e358c42feb0960ae62c34aa4caaf12ada0a1fb5d99b"},
+ {file = "lxml-4.8.0-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28d1af847786f68bec57961f31221125c29d6f52d9187c01cd34dc14e2b29430"},
+ {file = "lxml-4.8.0-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b92d40121dcbd74831b690a75533da703750f7041b4bf951befc657c37e5695a"},
+ {file = "lxml-4.8.0-cp27-cp27m-win32.whl", hash = "sha256:e01f9531ba5420838c801c21c1b0f45dbc9607cb22ea2cf132844453bec863a5"},
+ {file = "lxml-4.8.0-cp27-cp27m-win_amd64.whl", hash = "sha256:6259b511b0f2527e6d55ad87acc1c07b3cbffc3d5e050d7e7bcfa151b8202df9"},
+ {file = "lxml-4.8.0-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1010042bfcac2b2dc6098260a2ed022968dbdfaf285fc65a3acf8e4eb1ffd1bc"},
+ {file = "lxml-4.8.0-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fa56bb08b3dd8eac3a8c5b7d075c94e74f755fd9d8a04543ae8d37b1612dd170"},
+ {file = "lxml-4.8.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:31ba2cbc64516dcdd6c24418daa7abff989ddf3ba6d3ea6f6ce6f2ed6e754ec9"},
+ {file = "lxml-4.8.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:31499847fc5f73ee17dbe1b8e24c6dafc4e8d5b48803d17d22988976b0171f03"},
+ {file = "lxml-4.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:5f7d7d9afc7b293147e2d506a4596641d60181a35279ef3aa5778d0d9d9123fe"},
+ {file = "lxml-4.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a3c5f1a719aa11866ffc530d54ad965063a8cbbecae6515acbd5f0fae8f48eaa"},
+ {file = "lxml-4.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6268e27873a3d191849204d00d03f65c0e343b3bcb518a6eaae05677c95621d1"},
+ {file = "lxml-4.8.0-cp310-cp310-win32.whl", hash = "sha256:330bff92c26d4aee79c5bc4d9967858bdbe73fdbdbacb5daf623a03a914fe05b"},
+ {file = "lxml-4.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:b2582b238e1658c4061ebe1b4df53c435190d22457642377fd0cb30685cdfb76"},
+ {file = "lxml-4.8.0-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a2bfc7e2a0601b475477c954bf167dee6d0f55cb167e3f3e7cefad906e7759f6"},
+ {file = "lxml-4.8.0-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a1547ff4b8a833511eeaceacbcd17b043214fcdb385148f9c1bc5556ca9623e2"},
+ {file = "lxml-4.8.0-cp35-cp35m-win32.whl", hash = "sha256:a9f1c3489736ff8e1c7652e9dc39f80cff820f23624f23d9eab6e122ac99b150"},
+ {file = "lxml-4.8.0-cp35-cp35m-win_amd64.whl", hash = "sha256:530f278849031b0eb12f46cca0e5db01cfe5177ab13bd6878c6e739319bae654"},
+ {file = "lxml-4.8.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:078306d19a33920004addeb5f4630781aaeabb6a8d01398045fcde085091a169"},
+ {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:86545e351e879d0b72b620db6a3b96346921fa87b3d366d6c074e5a9a0b8dadb"},
+ {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24f5c5ae618395ed871b3d8ebfcbb36e3f1091fd847bf54c4de623f9107942f3"},
+ {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:bbab6faf6568484707acc052f4dfc3802bdb0cafe079383fbaa23f1cdae9ecd4"},
+ {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7993232bd4044392c47779a3c7e8889fea6883be46281d45a81451acfd704d7e"},
+ {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d6483b1229470e1d8835e52e0ff3c6973b9b97b24cd1c116dca90b57a2cc613"},
+ {file = "lxml-4.8.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:ad4332a532e2d5acb231a2e5d33f943750091ee435daffca3fec0a53224e7e33"},
+ {file = "lxml-4.8.0-cp36-cp36m-win32.whl", hash = "sha256:db3535733f59e5605a88a706824dfcb9bd06725e709ecb017e165fc1d6e7d429"},
+ {file = "lxml-4.8.0-cp36-cp36m-win_amd64.whl", hash = "sha256:5f148b0c6133fb928503cfcdfdba395010f997aa44bcf6474fcdd0c5398d9b63"},
+ {file = "lxml-4.8.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:8a31f24e2a0b6317f33aafbb2f0895c0bce772980ae60c2c640d82caac49628a"},
+ {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:719544565c2937c21a6f76d520e6e52b726d132815adb3447ccffbe9f44203c4"},
+ {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:c0b88ed1ae66777a798dc54f627e32d3b81c8009967c63993c450ee4cbcbec15"},
+ {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fa9b7c450be85bfc6cd39f6df8c5b8cbd76b5d6fc1f69efec80203f9894b885f"},
+ {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e9f84ed9f4d50b74fbc77298ee5c870f67cb7e91dcdc1a6915cb1ff6a317476c"},
+ {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1d650812b52d98679ed6c6b3b55cbb8fe5a5460a0aef29aeb08dc0b44577df85"},
+ {file = "lxml-4.8.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:80bbaddf2baab7e6de4bc47405e34948e694a9efe0861c61cdc23aa774fcb141"},
+ {file = "lxml-4.8.0-cp37-cp37m-win32.whl", hash = "sha256:6f7b82934c08e28a2d537d870293236b1000d94d0b4583825ab9649aef7ddf63"},
+ {file = "lxml-4.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e1fd7d2fe11f1cb63d3336d147c852f6d07de0d0020d704c6031b46a30b02ca8"},
+ {file = "lxml-4.8.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:5045ee1ccd45a89c4daec1160217d363fcd23811e26734688007c26f28c9e9e7"},
+ {file = "lxml-4.8.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0c1978ff1fd81ed9dcbba4f91cf09faf1f8082c9d72eb122e92294716c605428"},
+ {file = "lxml-4.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cbf2ff155b19dc4d4100f7442f6a697938bf4493f8d3b0c51d45568d5666b5"},
+ {file = "lxml-4.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ce13d6291a5f47c1c8dbd375baa78551053bc6b5e5c0e9bb8e39c0a8359fd52f"},
+ {file = "lxml-4.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11527dc23d5ef44d76fef11213215c34f36af1608074561fcc561d983aeb870"},
+ {file = "lxml-4.8.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:60d2f60bd5a2a979df28ab309352cdcf8181bda0cca4529769a945f09aba06f9"},
+ {file = "lxml-4.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:62f93eac69ec0f4be98d1b96f4d6b964855b8255c345c17ff12c20b93f247b68"},
+ {file = "lxml-4.8.0-cp38-cp38-win32.whl", hash = "sha256:20b8a746a026017acf07da39fdb10aa80ad9877046c9182442bf80c84a1c4696"},
+ {file = "lxml-4.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:891dc8f522d7059ff0024cd3ae79fd224752676447f9c678f2a5c14b84d9a939"},
+ {file = "lxml-4.8.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b6fc2e2fb6f532cf48b5fed57567ef286addcef38c28874458a41b7837a57807"},
+ {file = "lxml-4.8.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:74eb65ec61e3c7c019d7169387d1b6ffcfea1b9ec5894d116a9a903636e4a0b1"},
+ {file = "lxml-4.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:627e79894770783c129cc5e89b947e52aa26e8e0557c7e205368a809da4b7939"},
+ {file = "lxml-4.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:545bd39c9481f2e3f2727c78c169425efbfb3fbba6e7db4f46a80ebb249819ca"},
+ {file = "lxml-4.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5a58d0b12f5053e270510bf12f753a76aaf3d74c453c00942ed7d2c804ca845c"},
+ {file = "lxml-4.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ec4b4e75fc68da9dc0ed73dcdb431c25c57775383fec325d23a770a64e7ebc87"},
+ {file = "lxml-4.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5804e04feb4e61babf3911c2a974a5b86f66ee227cc5006230b00ac6d285b3a9"},
+ {file = "lxml-4.8.0-cp39-cp39-win32.whl", hash = "sha256:aa0cf4922da7a3c905d000b35065df6184c0dc1d866dd3b86fd961905bbad2ea"},
+ {file = "lxml-4.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:dd10383f1d6b7edf247d0960a3db274c07e96cf3a3fc7c41c8448f93eac3fb1c"},
+ {file = "lxml-4.8.0-pp37-pypy37_pp73-macosx_10_14_x86_64.whl", hash = "sha256:2403a6d6fb61c285969b71f4a3527873fe93fd0abe0832d858a17fe68c8fa507"},
+ {file = "lxml-4.8.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:986b7a96228c9b4942ec420eff37556c5777bfba6758edcb95421e4a614b57f9"},
+ {file = "lxml-4.8.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6fe4ef4402df0250b75ba876c3795510d782def5c1e63890bde02d622570d39e"},
+ {file = "lxml-4.8.0-pp38-pypy38_pp73-macosx_10_14_x86_64.whl", hash = "sha256:f10ce66fcdeb3543df51d423ede7e238be98412232fca5daec3e54bcd16b8da0"},
+ {file = "lxml-4.8.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:730766072fd5dcb219dd2b95c4c49752a54f00157f322bc6d71f7d2a31fecd79"},
+ {file = "lxml-4.8.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8b99ec73073b37f9ebe8caf399001848fced9c08064effdbfc4da2b5a8d07b93"},
+ {file = "lxml-4.8.0.tar.gz", hash = "sha256:f63f62fc60e6228a4ca9abae28228f35e1bd3ce675013d1dfb828688d50c6e23"},
+]
+memory-profiler = [
+ {file = "memory_profiler-0.61.0-py3-none-any.whl", hash = "sha256:400348e61031e3942ad4d4109d18753b2fb08c2f6fb8290671c5513a34182d84"},
+ {file = "memory_profiler-0.61.0.tar.gz", hash = "sha256:4e5b73d7864a1d1292fb76a03e82a3e78ef934d06828a698d9dada76da2067b0"},
+]
+numpy = [
+ {file = "numpy-1.22.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75"},
+ {file = "numpy-1.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab"},
+ {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e"},
+ {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4"},
+ {file = "numpy-1.22.3-cp310-cp310-win32.whl", hash = "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430"},
+ {file = "numpy-1.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4"},
+ {file = "numpy-1.22.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce"},
+ {file = "numpy-1.22.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe"},
+ {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5"},
+ {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1"},
+ {file = "numpy-1.22.3-cp38-cp38-win32.whl", hash = "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62"},
+ {file = "numpy-1.22.3-cp38-cp38-win_amd64.whl", hash = "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676"},
+ {file = "numpy-1.22.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123"},
+ {file = "numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802"},
+ {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d"},
+ {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168"},
+ {file = "numpy-1.22.3-cp39-cp39-win32.whl", hash = "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"},
+ {file = "numpy-1.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a"},
+ {file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"},
+ {file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"},
+]
+outcome = [
+ {file = "outcome-1.1.0-py2.py3-none-any.whl", hash = "sha256:c7dd9375cfd3c12db9801d080a3b63d4b0a261aa996c4c13152380587288d958"},
+ {file = "outcome-1.1.0.tar.gz", hash = "sha256:e862f01d4e626e63e8f92c38d1f8d5546d3f9cce989263c521b2e7990d186967"},
+]
+packaging = [
+ {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
+ {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
+]
+pandas = [
+ {file = "pandas-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3dfb32ed50122fe8c5e7f2b8d97387edd742cc78f9ec36f007ee126cd3720907"},
+ {file = "pandas-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0259cd11e7e6125aaea3af823b80444f3adad6149ff4c97fef760093598b3e34"},
+ {file = "pandas-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:96e9ece5759f9b47ae43794b6359bbc54805d76e573b161ae770c1ea59393106"},
+ {file = "pandas-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508c99debccd15790d526ce6b1624b97a5e1e4ca5b871319fb0ebfd46b8f4dad"},
+ {file = "pandas-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6a7bbbb7950063bfc942f8794bc3e31697c020a14f1cd8905fc1d28ec674a01"},
+ {file = "pandas-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:c614001129b2a5add5e3677c3a213a9e6fd376204cb8d17c04e84ff7dfc02a73"},
+ {file = "pandas-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:4e1176f45981c8ccc8161bc036916c004ca51037a7ed73f2d2a9857e6dbe654f"},
+ {file = "pandas-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bbb15ad79050e8b8d39ec40dd96a30cd09b886a2ae8848d0df1abba4d5502a67"},
+ {file = "pandas-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6d6ad1da00c7cc7d8dd1559a6ba59ba3973be6b15722d49738b2be0977eb8a0c"},
+ {file = "pandas-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:358b0bc98a5ff067132d23bf7a2242ee95db9ea5b7bbc401cf79205f11502fd3"},
+ {file = "pandas-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6105af6533f8b63a43ea9f08a2ede04e8f43e49daef0209ab0d30352bcf08bee"},
+ {file = "pandas-1.4.1-cp38-cp38-win32.whl", hash = "sha256:04dd15d9db538470900c851498e532ef28d4e56bfe72c9523acb32042de43dfb"},
+ {file = "pandas-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b384516dbb4e6aae30e3464c2e77c563da5980440fbdfbd0968e3942f8f9d70"},
+ {file = "pandas-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f02e85e6d832be37d7f16cf6ac8bb26b519ace3e5f3235564a91c7f658ab2a43"},
+ {file = "pandas-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0b1a13f647e4209ed7dbb5da3497891d0045da9785327530ab696417ef478f84"},
+ {file = "pandas-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:19f7c632436b1b4f84615c3b127bbd7bc603db95e3d4332ed259dc815c9aaa26"},
+ {file = "pandas-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ea47ba1d6f359680130bd29af497333be6110de8f4c35b9211eec5a5a9630fa"},
+ {file = "pandas-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e5a7a1e0ecaac652326af627a3eca84886da9e667d68286866d4e33f6547caf"},
+ {file = "pandas-1.4.1-cp39-cp39-win32.whl", hash = "sha256:1d85d5f6be66dfd6d1d8d13b9535e342a2214260f1852654b19fa4d7b8d1218b"},
+ {file = "pandas-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:3129a35d9dad1d80c234dd78f8f03141b914395d23f97cf92a366dcd19f8f8bf"},
+ {file = "pandas-1.4.1.tar.gz", hash = "sha256:8db93ec98ac7cb5f8ac1420c10f5e3c43533153f253fe7fb6d891cf5aa2b80d2"},
+]
+pluggy = [
+ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
+ {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
+]
+psutil = [
+ {file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"},
+ {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe"},
+ {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549"},
+ {file = "psutil-5.9.4-cp27-cp27m-win32.whl", hash = "sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad"},
+ {file = "psutil-5.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94"},
+ {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24"},
+ {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7"},
+ {file = "psutil-5.9.4-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7"},
+ {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1"},
+ {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08"},
+ {file = "psutil-5.9.4-cp36-abi3-win32.whl", hash = "sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff"},
+ {file = "psutil-5.9.4-cp36-abi3-win_amd64.whl", hash = "sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4"},
+ {file = "psutil-5.9.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e"},
+ {file = "psutil-5.9.4.tar.gz", hash = "sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62"},
+]
+pycparser = [
+ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
+ {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
+]
+pyopenssl = [
+ {file = "pyOpenSSL-22.0.0-py2.py3-none-any.whl", hash = "sha256:ea252b38c87425b64116f808355e8da644ef9b07e429398bfece610f893ee2e0"},
+ {file = "pyOpenSSL-22.0.0.tar.gz", hash = "sha256:660b1b1425aac4a1bea1d94168a85d99f0b3144c869dd4390d27629d0087f1bf"},
+]
+pyparsing = [
+ {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
+ {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
+]
+pysocks = [
+ {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"},
+ {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"},
+ {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"},
+]
+pytest = [
+ {file = "pytest-7.2.0-py3-none-any.whl", hash = "sha256:892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71"},
+ {file = "pytest-7.2.0.tar.gz", hash = "sha256:c4014eb40e10f11f355ad4e3c2fb2c6c6d1919c73f3b5a433de4708202cade59"},
+]
+pytest-mock = [
+ {file = "pytest-mock-3.10.0.tar.gz", hash = "sha256:fbbdb085ef7c252a326fd8cdcac0aa3b1333d8811f131bdcc701002e1be7ed4f"},
+ {file = "pytest_mock-3.10.0-py3-none-any.whl", hash = "sha256:f4c973eeae0282963eb293eb173ce91b091a79c1334455acfac9ddee8a1c784b"},
+]
+python-dateutil = [
+ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
+ {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+]
+pytz = [
+ {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
+ {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
+]
+requests = [
+ {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
+ {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
+]
+selenium = [
+ {file = "selenium-4.1.3-py3-none-any.whl", hash = "sha256:14d28a628c831c105d38305c881c9c7847199bfd728ec84240c5e86fa1c9bd5a"},
+]
+six = [
+ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+sniffio = [
+ {file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"},
+ {file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"},
+]
+sortedcontainers = [
+ {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
+ {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
+]
+soupsieve = [
+ {file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"},
+ {file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"},
+]
+sqlitedict = [
+ {file = "sqlitedict-2.0.0.tar.gz", hash = "sha256:23a370416f4e1e962daa293382f3a8dbc4127e6a0abc06a5d4e58e6902f05d17"},
+]
+tomli = [
+ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+tqdm = [
+ {file = "tqdm-4.63.0-py2.py3-none-any.whl", hash = "sha256:e643e071046f17139dea55b880dc9b33822ce21613b4a4f5ea57f202833dbc29"},
+ {file = "tqdm-4.63.0.tar.gz", hash = "sha256:1d9835ede8e394bb8c9dcbffbca02d717217113adc679236873eeaac5bc0b3cd"},
+]
+trio = [
+ {file = "trio-0.20.0-py3-none-any.whl", hash = "sha256:fb2d48e4eab0dfb786a472cd514aaadc71e3445b203bc300bad93daa75d77c1a"},
+ {file = "trio-0.20.0.tar.gz", hash = "sha256:670a52d3115d0e879e1ac838a4eb999af32f858163e3a704fe4839de2a676070"},
+]
+trio-websocket = [
+ {file = "trio-websocket-0.9.2.tar.gz", hash = "sha256:a3d34de8fac26023eee701ed1e7bf4da9a8326b61a62934ec9e53b64970fd8fe"},
+ {file = "trio_websocket-0.9.2-py3-none-any.whl", hash = "sha256:5b558f6e83cc20a37c3b61202476c5295d1addf57bd65543364e0337e37ed2bc"},
+]
+urllib3 = [
+ {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
+ {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
+]
+webdriver-manager = [
+ {file = "webdriver_manager-3.5.4-py2.py3-none-any.whl", hash = "sha256:b5b91b5df83181e002263fe27296967a5b19cb1ebe8e4a63ee83538394037df4"},
+ {file = "webdriver_manager-3.5.4.tar.gz", hash = "sha256:2eb7c2fe38ec5b06e2090164923e4dfb7c3ac4e7140333a3de9c7956f5047858"},
+]
+win32-setctime = [
+ {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
+ {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
+]
+wsproto = [
+ {file = "wsproto-1.1.0-py3-none-any.whl", hash = "sha256:2218cb57952d90b9fca325c0dcfb08c3bda93e8fd8070b0a17f048e2e47a521b"},
+ {file = "wsproto-1.1.0.tar.gz", hash = "sha256:a2e56bfd5c7cd83c1369d83b5feccd6d37798b74872866e62616e0ecf111bda8"},
+]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..066b74a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,33 @@
+[tool.poetry]
+name = "paperscraper"
+version = "0.1.0-alpha.1"
+description = "Scrape and provide interface for data from dblp"
+authors = []
+
+[tool.poetry.dependencies]
+python = "~=3.8"
+lxml = "^4.8.0"
+pandas = "^1.4.1"
+beautifulsoup4 = "^4.10.0"
+selenium = "^4.1.3"
+numpy = "^1.22.3"
+click = "^8.0.4"
+loguru = "^0.6.0"
+tqdm = "^4.63.0"
+webdriver-manager = "^3.5.4"
+sqlitedict = "^2.0.0"
+
+[tool.poetry.dev-dependencies]
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.2.0"
+debugpy = "^1.6.3"
+memory-profiler = "^0.61.0"
+pytest-mock = "^3.10.0"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.scripts]
+paperscraper = "paperscraper._cli:cli"
\ No newline at end of file
diff --git a/test/assets/data/dblp_processed.xml b/test/assets/data/dblp_processed.xml
new file mode 100644
index 0000000..acf70d7
--- /dev/null
+++ b/test/assets/data/dblp_processed.xml
@@ -0,0 +1,89 @@
+
+
+
+
+ Iván Cantador
+ Ignacio Fernández-Tobías
+ Shlomo Berkovsky
+ Paolo Cremonesi
+ Cross-Domain Recommender Systems.
+ 919-959
+ 2015
+ Recommender Systems Handbook
+ https://doi.org/10.1007/978-1-4899-7637-6_27
+ reference/sp/2015rsh
+ db/reference/sp/rsh2015.html#CantadorFBC15
+
+
+ Jeffrey V. Nickerson
+ Human-Based Evolutionary Computing.
+ 641-648
+ 2013
+ Handbook of Human Computation
+ https://doi.org/10.1007/978-1-4614-8806-4_51
+ https://www.wikidata.org/entity/Q105641856
+ reference/sp/2013hc
+ db/reference/sp/hc2013.html#Nickerson13
+
+
+ Luciana S. Buriol
+ Network Optimization.
+ 1123-1140
+ 2018
+ Handbook of Heuristics
+ https://doi.org/10.1007/978-3-319-07124-4_46
+ reference/sp/2018heuristics
+ db/reference/sp/heuristics2018.html#Buriol18
+
+
+ Alexander Felfernig
+ Gerhard Friedrich
+ Dietmar Jannach
+ Markus Zanker
+ Constraint-Based Recommender Systems.
+ 161-190
+ 2015
+ Recommender Systems Handbook
+ https://doi.org/10.1007/978-1-4899-7637-6_5
+ reference/sp/2015rsh
+ db/reference/sp/rsh2015.html#FelfernigFJZ15
+
+
+ Fernando Sandoya
+ Anna Martínez-Gavara
+ Ricardo Aceves
+ Abraham Duarte
+ Rafael Martí
+ Diversity and Equity Models.
+ 979-998
+ 2018
+ Handbook of Heuristics
+ https://doi.org/10.1007/978-3-319-07124-4_61
+ reference/sp/2018heuristics
+ db/reference/sp/heuristics2018.html#SandoyaMADM18
+
+
+ Liane Gabora
+ Cultural Evolution as Distributed Computation.
+ 447-461
+ 2013
+ Handbook of Human Computation
+ https://doi.org/10.1007/978-1-4614-8806-4_34
+ https://www.wikidata.org/entity/Q105641836
+ reference/sp/2013hc
+ db/reference/sp/hc2013.html#Gabora13
+
+
+ Jameson L. Toole
+ Yves-Alexandre de Montjoye
+ Marta C. González
+ Alex 'Sandy' Pentland
+ Modeling and Understanding Intrinsic Characteristics of Human Mobility.
+ 13-34
+ 2018
+ reference/sp/2018mdp
+ Handbook of Mobile Data Privacy
+ https://doi.org/10.1007/978-3-319-98161-1_2
+ db/reference/sp/mdp2018.html#TooleMGP18
+
+
diff --git a/test/test_cli.py b/test/test_cli.py
new file mode 100644
index 0000000..b7b260d
--- /dev/null
+++ b/test/test_cli.py
@@ -0,0 +1,58 @@
+import pytest
+import importlib
+from click.testing import CliRunner
+import paperscraper
+import pytest_mock
+
+import paperscraper._cli
+
+
+@pytest.fixture(scope="function")
+def runner():
+ return CliRunner()
+
+
+def called_with_config_and_force(mocked_function):
+ mocked_function.assert_called_with(config=paperscraper._cli.config, force=True)
+
+
+def mock_function(mocker, mock_function):
+ mocker.patch(mock_function)
+ # Before the main methods gets imported need to mock them
+ importlib.reload(paperscraper._cli)
+
+
+def test_process_db(runner, mocker):
+ mock_function(mocker, "paperscraper._preprocess.get_processed_db")
+ result = runner.invoke(paperscraper._cli.cli, ["process", "process-db", "-f"])
+ called_with_config_and_force(paperscraper._preprocess.get_processed_db)
+
+
+def test_extract_data(runner, mocker):
+ mock_function(mocker, "paperscraper._preprocess.get_extracted_data")
+ result = runner.invoke(paperscraper._cli.cli, ["process", "extract-data", "-f"])
+ called_with_config_and_force(paperscraper._preprocess.get_extracted_data)
+
+
+def test_process_data(runner, mocker):
+ mock_function(mocker, "paperscraper._preprocess.get_processed_data")
+ result = runner.invoke(paperscraper._cli.cli, ["process", "process-data", "-f"])
+ called_with_config_and_force(paperscraper._preprocess.get_processed_data)
+
+
+def test_post_process_data(runner, mocker):
+ mock_function(mocker, "paperscraper._postprocess.get_post_processed_data")
+ result = runner.invoke(paperscraper._cli.cli, ["process", "post-process-data", "-f"])
+ called_with_config_and_force(paperscraper._postprocess.get_post_processed_data)
+
+
+def test_run_all(runner, mocker):
+ mock_function(mocker, "paperscraper._preprocess.get_processed_db")
+ mock_function(mocker, "paperscraper._preprocess.get_extracted_data")
+ mock_function(mocker, "paperscraper._preprocess.get_processed_data")
+ mock_function(mocker, "paperscraper._postprocess.get_post_processed_data")
+ result = runner.invoke(paperscraper._cli.cli, ["process", "run-all", "-f"])
+ called_with_config_and_force(paperscraper._preprocess.get_processed_db)
+ called_with_config_and_force(paperscraper._preprocess.get_extracted_data)
+ called_with_config_and_force(paperscraper._preprocess.get_processed_data)
+ called_with_config_and_force(paperscraper._postprocess.get_post_processed_data)
diff --git a/test/test_preprocess.py b/test/test_preprocess.py
new file mode 100644
index 0000000..c3d2530
--- /dev/null
+++ b/test/test_preprocess.py
@@ -0,0 +1,46 @@
+import pytest
+import shutil
+from pathlib import Path
+from paperscraper._preprocess import get_extracted_data
+from paperscraper.config import Config
+
+
+@pytest.fixture(scope="class")
+def test_config(request, tmp_path_factory):
+ root_dir = Path(__file__).parent
+ output_dir = tmp_path_factory.mktemp("output")
+ _config = Config(root_dir=root_dir, output_dir=output_dir)
+ _config.interesting_venues = {
+ "Handbook of Human Computation": {
+ "sourcetype": "booktitle",
+ "publishers": []
+ },
+ "Recommender Systems Handbook": {
+ "sourcetype": "booktitle",
+ "publishers": []
+ },
+ "Handbook of Heuristics": {
+ "sourcetype": "booktitle",
+ "publishers": []
+ }
+ }
+ yield _config
+ shutil.rmtree(str(output_dir))
+
+
+class Test_get_extracted_data:
+ def _get_extracted_data_results(self, data, venues):
+ _len_data = len(data)
+ data.close(force=True)
+ _len_venues = len(venues)
+ venues.close(force=True)
+ assert _len_data == 6
+ assert _len_venues == 4
+
+ def test_get_extracted_data_first(self, test_config):
+ data, venues = get_extracted_data(test_config, force=True)
+ self._get_extracted_data_results(data, venues)
+
+ def test_get_extracted_data_second(self, test_config):
+ data, venues = get_extracted_data(test_config, force=False)
+ self._get_extracted_data_results(data, venues)