From 922fa7d33bf28674ee1ac3800b5504add3d3415f Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Fri, 12 Jul 2019 14:57:42 +0200 Subject: [PATCH 01/43] update latest code from HF --- cdqa/reader/run_squad.py | 1357 ++++++++------------------- cdqa/reader/utils_squad.py | 743 +++++++++++++++ cdqa/reader/utils_squad_evaluate.py | 289 ++++++ 3 files changed, 1399 insertions(+), 990 deletions(-) create mode 100644 cdqa/reader/utils_squad.py create mode 100644 cdqa/reader/utils_squad_evaluate.py diff --git a/cdqa/reader/run_squad.py b/cdqa/reader/run_squad.py index a3525b1e..af4a771f 100644 --- a/cdqa/reader/run_squad.py +++ b/cdqa/reader/run_squad.py @@ -13,18 +13,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Run BERT on SQuAD.""" +""" Finetuning a question-answering model (Bert, XLM, XLNet,...) on SQuAD.""" from __future__ import absolute_import, division, print_function import argparse -import collections -import json import logging -import math import os import random -import sys from io import open import numpy as np @@ -34,747 +30,269 @@ from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm, trange -from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling import BertForQuestionAnswering, BertConfig -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule -from pytorch_pretrained_bert.tokenization import (BasicTokenizer, - BertTokenizer, - whitespace_tokenize) +from tensorboardX import SummaryWriter -if sys.version_info[0] == 2: - import cPickle as pickle -else: - import pickle +from pytorch_transformers import (WEIGHTS_NAME, BertConfig, + BertForQuestionAnswering, BertTokenizer, + XLMConfig, XLMForQuestionAnswering, + XLMTokenizer, XLNetConfig, + XLNetForQuestionAnswering, + XLNetTokenizer) + +from pytorch_transformers import AdamW, WarmupLinearSchedule + +from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions + +from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad logger = logging.getLogger(__name__) +ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) \ + for conf in (BertConfig, XLNetConfig, XLMConfig)), ()) + +MODEL_CLASSES = { + 'bert': (BertConfig, BertForQuestionAnswering, BertTokenizer), + 'xlnet': (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer), + 'xlm': (XLMConfig, XLMForQuestionAnswering, XLMTokenizer), +} + +def set_seed(args): + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if args.n_gpu > 0: + torch.cuda.manual_seed_all(args.seed) + + +def train(args, train_dataset, model, tokenizer): + """ Train the model """ + if args.local_rank in [-1, 0]: + tb_writer = SummaryWriter() + + args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) + train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset) + train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) -class SquadExample(object): - """ - A single training/test example for the Squad dataset. - For examples without an answer, the start and end position are -1. - """ - - def __init__(self, - qas_id, - question_text, - doc_tokens, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=None): - self.qas_id = qas_id - self.question_text = question_text - self.doc_tokens = doc_tokens - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - def __str__(self): - return self.__repr__() - - def __repr__(self): - s = "" - s += "qas_id: %s" % (self.qas_id) - s += ", question_text: %s" % ( - self.question_text) - s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) - if self.start_position: - s += ", start_position: %d" % (self.start_position) - if self.end_position: - s += ", end_position: %d" % (self.end_position) - if self.is_impossible: - s += ", is_impossible: %r" % (self.is_impossible) - return s - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - start_position=None, - end_position=None, - is_impossible=None): - self.unique_id = unique_id - self.example_index = example_index - self.doc_span_index = doc_span_index - self.tokens = tokens - self.token_to_orig_map = token_to_orig_map - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -def read_squad_examples(input_file, is_training, version_2_with_negative): - """Read a SQuAD json file into a list of SquadExample.""" - with open(input_file, "r", encoding='utf-8') as reader: - input_data = json.load(reader)["data"] - - def is_whitespace(c): - if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: - return True - return False - - examples = [] - for entry in input_data: - for paragraph in entry["paragraphs"]: - paragraph_text = paragraph["context"] - doc_tokens = [] - char_to_word_offset = [] - prev_is_whitespace = True - for c in paragraph_text: - if is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - for qa in paragraph["qas"]: - qas_id = qa["id"] - question_text = qa["question"] - start_position = None - end_position = None - orig_answer_text = None - is_impossible = False - if is_training: - if version_2_with_negative: - is_impossible = qa["is_impossible"] - if (len(qa["answers"]) != 1) and (not is_impossible): - raise ValueError( - "For training, each question should have exactly 1 answer.") - if not is_impossible: - answer = qa["answers"][0] - orig_answer_text = answer["text"] - answer_offset = answer["answer_start"] - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[answer_offset + answer_length - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = " ".join(doc_tokens[start_position:(end_position + 1)]) - cleaned_answer_text = " ".join( - whitespace_tokenize(orig_answer_text)) - if actual_text.find(cleaned_answer_text) == -1: - logger.warning("Could not find answer: '%s' vs. '%s'", - actual_text, cleaned_answer_text) - continue - else: - start_position = -1 - end_position = -1 - orig_answer_text = "" - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - examples.append(example) - return examples - - -def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training): - """Loads a data file into a list of `InputBatch`s.""" - - unique_id = 1000000000 - - features = [] - for (example_index, example) in enumerate(examples): - query_tokens = tokenizer.tokenize(example.question_text) - - if len(query_tokens) > max_query_length: - query_tokens = query_tokens[0:max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = tokenizer.tokenize(token) - for sub_token in sub_tokens: - tok_to_orig_index.append(i) - all_doc_tokens.append(sub_token) - - tok_start_position = None - tok_end_position = None - if is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 + if args.max_steps > 0: + t_total = args.max_steps + args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1 + else: + t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs + + # Prepare optimizer and schedule (linear warmup and decay) + no_decay = ['bias', 'LayerNorm.weight'] + optimizer_grouped_parameters = [ + {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, + {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} + ] + optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) + scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total) + if args.fp16: + try: + from apex import amp + except ImportError: + raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") + model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) + + # Train! + logger.info("***** Running training *****") + logger.info(" Num examples = %d", len(train_dataset)) + logger.info(" Num Epochs = %d", args.num_train_epochs) + logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) + logger.info(" Total train batch size (w. parallel, distributed & accumulation) = %d", + args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1)) + logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) + logger.info(" Total optimization steps = %d", t_total) + + global_step = 0 + tr_loss, logging_loss = 0.0, 0.0 + model.zero_grad() + train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]) + set_seed(args) # Added here for reproductibility (even between python 2 and 3) + for _ in train_iterator: + epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) + for step, batch in enumerate(epoch_iterator): + model.train() + batch = tuple(t.to(args.device) for t in batch) + inputs = {'input_ids': batch[0], + 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids + 'attention_mask': batch[2], + 'start_positions': batch[3], + 'end_positions': batch[4]} + ouputs = model(**inputs) + loss = ouputs[0] # model outputs are always tuple in pytorch-transformers (see doc) + + if args.n_gpu > 1: + loss = loss.mean() # mean() to average on multi-gpu parallel training + if args.gradient_accumulation_steps > 1: + loss = loss / args.gradient_accumulation_steps + + if args.fp16: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = _improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, tokenizer, - example.orig_answer_text) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) - doc_spans = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): + loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) + + tr_loss += loss.item() + if (step + 1) % args.gradient_accumulation_steps == 0: + scheduler.step() # Update learning rate schedule + optimizer.step() + model.zero_grad() + global_step += 1 + + if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0: + # Log metrics + if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well + results = evaluate(args, model, tokenizer) + for key, value in results.items(): + tb_writer.add_scalar('eval_{}'.format(key), value, global_step) + tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step) + tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args.logging_steps, global_step) + logging_loss = tr_loss + + if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: + # Save model checkpoint + output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step)) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training + model_to_save.save_pretrained(output_dir) + torch.save(args, os.path.join(output_dir, 'training_args.bin')) + logger.info("Saving model checkpoint to %s", output_dir) + + if args.max_steps > 0 and global_step > args.max_steps: + epoch_iterator.close() break - start_offset += min(length, doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_to_orig_map = {} - token_is_max_context = {} - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - start_position = None - end_position = None - if is_training and not example.is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start and - tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 - if example_index < 20: - logger.info("*** Example ***") - logger.info("unique_id: %s" % (unique_id)) - logger.info("example_index: %s" % (example_index)) - logger.info("doc_span_index: %s" % (doc_span_index)) - logger.info("tokens: %s" % " ".join(tokens)) - logger.info("token_to_orig_map: %s" % " ".join([ - "%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])) - logger.info("token_is_max_context: %s" % " ".join([ - "%d:%s" % (x, y) for (x, y) in token_is_max_context.items() - ])) - logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) - logger.info( - "input_mask: %s" % " ".join([str(x) for x in input_mask])) - logger.info( - "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) - if is_training and example.is_impossible: - logger.info("impossible example") - if is_training and not example.is_impossible: - answer_text = " ".join(tokens[start_position:(end_position + 1)]) - logger.info("start_position: %d" % (start_position)) - logger.info("end_position: %d" % (end_position)) - logger.info( - "answer: %s" % (answer_text)) - - features.append( - InputFeatures( - unique_id=unique_id, - example_index=example_index, - doc_span_index=doc_span_index, - tokens=tokens, - token_to_orig_map=token_to_orig_map, - token_is_max_context=token_is_max_context, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - start_position=start_position, - end_position=end_position, - is_impossible=example.is_impossible)) - unique_id += 1 - - return features - - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - - -RawResult = collections.namedtuple("RawResult", - ["unique_id", "start_logits", "end_logits"]) - - -def write_predictions(all_examples, all_features, all_results, n_best_size, - max_answer_length, do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, verbose_logging, - version_2_with_negative, null_score_diff_threshold): - """Write final predictions to the json file and log-odds of null if needed.""" - logger.info("Writing predictions to: %s" % (output_prediction_file)) - logger.info("Writing nbest to: %s" % (output_nbest_file)) - - example_index_to_features = collections.defaultdict(list) - for feature in all_features: - example_index_to_features[feature.example_index].append(feature) - - unique_id_to_result = {} - for result in all_results: - unique_id_to_result[result.unique_id] = result - - _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name - "PrelimPrediction", - ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) - - all_predictions = collections.OrderedDict() - all_nbest_json = collections.OrderedDict() - scores_diff_json = collections.OrderedDict() - - for (example_index, example) in enumerate(all_examples): - features = example_index_to_features[example_index] - - prelim_predictions = [] - # keep track of the minimum score of null start+end of position 0 - score_null = 1000000 # large and positive - min_null_feature_index = 0 # the paragraph slice with min null score - null_start_logit = 0 # the start logit at the slice with min null score - null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): - result = unique_id_to_result[feature.unique_id] - start_indexes = _get_best_indexes(result.start_logits, n_best_size) - end_indexes = _get_best_indexes(result.end_logits, n_best_size) - # if we could have irrelevant answers, get the min score of irrelevant - if version_2_with_negative: - feature_null_score = result.start_logits[0] + result.end_logits[0] - if feature_null_score < score_null: - score_null = feature_null_score - min_null_feature_index = feature_index - null_start_logit = result.start_logits[0] - null_end_logit = result.end_logits[0] - for start_index in start_indexes: - for end_index in end_indexes: - # We could hypothetically create invalid predictions, e.g., predict - # that the start of the span is in the question. We throw out all - # invalid predictions. - if start_index >= len(feature.tokens): - continue - if end_index >= len(feature.tokens): - continue - if start_index not in feature.token_to_orig_map: - continue - if end_index not in feature.token_to_orig_map: - continue - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction( - feature_index=feature_index, - start_index=start_index, - end_index=end_index, - start_logit=result.start_logits[start_index], - end_logit=result.end_logits[end_index])) - if version_2_with_negative: - prelim_predictions.append( - _PrelimPrediction( - feature_index=min_null_feature_index, - start_index=0, - end_index=0, - start_logit=null_start_logit, - end_logit=null_end_logit)) - prelim_predictions = sorted( - prelim_predictions, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True) - - _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name - "NbestPrediction", ["text", "start_logit", "end_logit"]) - - seen_predictions = {} - nbest = [] - for pred in prelim_predictions: - if len(nbest) >= n_best_size: - break - feature = features[pred.feature_index] - if pred.start_index > 0: # this is a non-null prediction - tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] - orig_doc_start = feature.token_to_orig_map[pred.start_index] - orig_doc_end = feature.token_to_orig_map[pred.end_index] - orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] - tok_text = " ".join(tok_tokens) - - # De-tokenize WordPieces that have been split off. - tok_text = tok_text.replace(" ##", "") - tok_text = tok_text.replace("##", "") - - # Clean whitespace - tok_text = tok_text.strip() - tok_text = " ".join(tok_text.split()) - orig_text = " ".join(orig_tokens) - - final_text = get_final_text(tok_text, orig_text, do_lower_case, verbose_logging) - if final_text in seen_predictions: - continue - - seen_predictions[final_text] = True - else: - final_text = "" - seen_predictions[final_text] = True - - nbest.append( - _NbestPrediction( - text=final_text, - start_logit=pred.start_logit, - end_logit=pred.end_logit)) - # if we didn't include the empty option in the n-best, include it - if version_2_with_negative: - if "" not in seen_predictions: - nbest.append( - _NbestPrediction( - text="", - start_logit=null_start_logit, - end_logit=null_end_logit)) - - # In very rare edge cases we could only have single null prediction. - # So we just create a nonce prediction in this case to avoid failure. - if len(nbest)==1: - nbest.insert(0, - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) - - # In very rare edge cases we could have no valid predictions. So we - # just create a nonce prediction in this case to avoid failure. - if not nbest: - nbest.append( - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) - - assert len(nbest) >= 1 - - total_scores = [] - best_non_null_entry = None - for entry in nbest: - total_scores.append(entry.start_logit + entry.end_logit) - if not best_non_null_entry: - if entry.text: - best_non_null_entry = entry - - probs = _compute_softmax(total_scores) - - nbest_json = [] - for (i, entry) in enumerate(nbest): - output = collections.OrderedDict() - output["text"] = entry.text - output["probability"] = probs[i] - output["start_logit"] = entry.start_logit - output["end_logit"] = entry.end_logit - nbest_json.append(output) - - assert len(nbest_json) >= 1 - - if not version_2_with_negative: - all_predictions[example.qas_id] = nbest_json[0]["text"] - else: - # predict "" iff the null score - the score of best non-null > threshold - score_diff = score_null - best_non_null_entry.start_logit - ( - best_non_null_entry.end_logit) - scores_diff_json[example.qas_id] = score_diff - if score_diff > null_score_diff_threshold: - all_predictions[example.qas_id] = "" - else: - all_predictions[example.qas_id] = best_non_null_entry.text - all_nbest_json[example.qas_id] = nbest_json - - with open(output_prediction_file, "w") as writer: - writer.write(json.dumps(all_predictions, indent=4) + "\n") - - with open(output_nbest_file, "w") as writer: - writer.write(json.dumps(all_nbest_json, indent=4) + "\n") - - if version_2_with_negative: - with open(output_null_log_odds_file, "w") as writer: - writer.write(json.dumps(scores_diff_json, indent=4) + "\n") - - -def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): - """Project the tokenized prediction back to the original text.""" - - # When we created the data, we kept track of the alignment between original - # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So - # now `orig_text` contains the span of our original text corresponding to the - # span that we predicted. - # - # However, `orig_text` may contain extra characters that we don't want in - # our prediction. - # - # For example, let's say: - # pred_text = steve smith - # orig_text = Steve Smith's - # - # We don't want to return `orig_text` because it contains the extra "'s". - # - # We don't want to return `pred_text` because it's already been normalized - # (the SQuAD eval script also does punctuation stripping/lower casing but - # our tokenizer does additional normalization like stripping accent - # characters). - # - # What we really want to return is "Steve Smith". - # - # Therefore, we have to apply a semi-complicated alignment heuristic between - # `pred_text` and `orig_text` to get a character-to-character alignment. This - # can fail in certain cases in which case we just return `orig_text`. - - def _strip_spaces(text): - ns_chars = [] - ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): - if c == " ": - continue - ns_to_s_map[len(ns_chars)] = i - ns_chars.append(c) - ns_text = "".join(ns_chars) - return (ns_text, ns_to_s_map) - - # We first tokenize `orig_text`, strip whitespace from the result - # and `pred_text`, and check if they are the same length. If they are - # NOT the same length, the heuristic has failed. If they are the same - # length, we assume the characters are one-to-one aligned. - tokenizer = BasicTokenizer(do_lower_case=do_lower_case) - - tok_text = " ".join(tokenizer.tokenize(orig_text)) - - start_position = tok_text.find(pred_text) - if start_position == -1: - if verbose_logging: - logger.info( - "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) - return orig_text - end_position = start_position + len(pred_text) - 1 - - (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) - (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) - - if len(orig_ns_text) != len(tok_ns_text): - if verbose_logging: - logger.info("Length not equal after stripping spaces: '%s' vs '%s'", - orig_ns_text, tok_ns_text) - return orig_text - - # We then project the characters in `pred_text` back to `orig_text` using - # the character-to-character alignment. - tok_s_to_ns_map = {} - for (i, tok_index) in tok_ns_to_s_map.items(): - tok_s_to_ns_map[tok_index] = i - - orig_start_position = None - if start_position in tok_s_to_ns_map: - ns_start_position = tok_s_to_ns_map[start_position] - if ns_start_position in orig_ns_to_s_map: - orig_start_position = orig_ns_to_s_map[ns_start_position] - - if orig_start_position is None: - if verbose_logging: - logger.info("Couldn't map start position") - return orig_text - - orig_end_position = None - if end_position in tok_s_to_ns_map: - ns_end_position = tok_s_to_ns_map[end_position] - if ns_end_position in orig_ns_to_s_map: - orig_end_position = orig_ns_to_s_map[ns_end_position] - - if orig_end_position is None: - if verbose_logging: - logger.info("Couldn't map end position") - return orig_text - - output_text = orig_text[orig_start_position:(orig_end_position + 1)] - return output_text - - -def _get_best_indexes(logits, n_best_size): - """Get the n-best logits from a list.""" - index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) - - best_indexes = [] - for i in range(len(index_and_score)): - if i >= n_best_size: + if args.max_steps > 0 and global_step > args.max_steps: + train_iterator.close() break - best_indexes.append(index_and_score[i][0]) - return best_indexes + + return global_step, tr_loss / global_step -def _compute_softmax(scores): - """Compute softmax probability over raw logits.""" - if not scores: - return [] +def evaluate(args, model, tokenizer, prefix=""): + dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True) - max_score = None - for score in scores: - if max_score is None or score > max_score: - max_score = score + if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: + os.makedirs(args.output_dir) + + args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) + # Note that DistributedSampler samples randomly + eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset) + eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) + + # Eval! + logger.info("***** Running evaluation {} *****".format(prefix)) + logger.info(" Num examples = %d", len(dataset)) + logger.info(" Batch size = %d", args.eval_batch_size) + all_results = [] + for batch in tqdm(eval_dataloader, desc="Evaluating"): + model.eval() + batch = tuple(t.to(args.device) for t in batch) + example_indices = batch[3] + with torch.no_grad(): + inputs = {'input_ids': batch[0], + 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids + 'attention_mask': batch[2]} + outputs = model(**inputs) + batch_start_logits, batch_end_logits = outputs[:2] + + for i, example_index in enumerate(example_indices): + start_logits = batch_start_logits[i].detach().cpu().tolist() + end_logits = batch_end_logits[i].detach().cpu().tolist() + eval_feature = features[example_index.item()] + unique_id = int(eval_feature.unique_id) + all_results.append(RawResult(unique_id=unique_id, + start_logits=start_logits, + end_logits=end_logits)) + + output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) + output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) + output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + all_predictions = write_predictions(examples, features, all_results, + args.n_best_size, args.max_answer_length, + args.do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, + args.verbose_logging, args.version_2_with_negative, + args.null_score_diff_threshold) + + evaluate_options = EVAL_OPTS(data_file=args.predict_file, + pred_file=output_prediction_file, + na_prob_file=output_null_log_odds_file) + results = evaluate_on_squad(evaluate_options) + return results + + +def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): + # Load data features from cache or dataset file + input_file = args.predict_file if evaluate else args.train_file + cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( + 'dev' if evaluate else 'train', + list(filter(None, args.model_name.split('/'))).pop(), + str(args.max_seq_length))) + if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: + logger.info("Loading features from cached file %s", cached_features_file) + features = torch.load(cached_features_file) + else: + logger.info("Creating features from dataset file at %s", input_file) + examples = read_squad_examples(input_file=input_file, + is_training=not evaluate, + version_2_with_negative=args.version_2_with_negative) + features = convert_examples_to_features(examples=examples, + tokenizer=tokenizer, + max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, + max_query_length=args.max_query_length, + is_training=not evaluate) + if args.local_rank in [-1, 0]: + logger.info("Saving features into cached file %s", cached_features_file) + torch.save(features, cached_features_file) + + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) + all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) + if evaluate: + all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) + else: + all_start_positions = torch.tensor([f.start_position for f in features], dtype=torch.long) + all_end_positions = torch.tensor([f.end_position for f in features], dtype=torch.long) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions) - exp_scores = [] - total_sum = 0.0 - for score in scores: - x = math.exp(score - max_score) - exp_scores.append(x) - total_sum += x + if output_examples: + return dataset, examples, features + return dataset - probs = [] - for score in exp_scores: - probs.append(score / total_sum) - return probs def main(): parser = argparse.ArgumentParser() ## Required parameters - parser.add_argument("--bert_model", default=None, type=str, required=True, - help="Bert pre-trained model selected in the list: bert-base-uncased, " - "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " - "bert-base-multilingual-cased, bert-base-chinese.") + parser.add_argument("--train_file", default=None, type=str, required=True, + help="SQuAD json for training. E.g., train-v1.1.json") + parser.add_argument("--predict_file", default=None, type=str, required=True, + help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") + parser.add_argument("--model_name", default=None, type=str, required=True, + help="Bert/XLNet/XLM pre-trained model selected in the list: " + ", ".join(ALL_MODELS)) parser.add_argument("--output_dir", default=None, type=str, required=True, help="The output directory where the model checkpoints and predictions will be written.") ## Other parameters - parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json") - parser.add_argument("--predict_file", default=None, type=str, - help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") + parser.add_argument("--config_name", default="", type=str, + help="Pretrained config name or path if not the same as model_name") + parser.add_argument("--tokenizer_name", default="", type=str, + help="Pretrained tokenizer name or path if not the same as model_name") + parser.add_argument("--cache_dir", default="", type=str, + help="Where do you want to store the pre-trained models downloaded from s3") + + parser.add_argument('--version_2_with_negative', action='store_true', + help='If true, the SQuAD examples contain some that do not have an answer.') + parser.add_argument('--null_score_diff_threshold', type=float, default=0.0, + help="If null_score - best_non_null is greater than the threshold predict null.") + parser.add_argument("--max_seq_length", default=384, type=int, help="The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded.") @@ -783,62 +301,74 @@ def main(): parser.add_argument("--max_query_length", default=64, type=int, help="The maximum number of tokens for the question. Questions longer than this will " "be truncated to this length.") - parser.add_argument("--do_train", action='store_true', help="Whether to run training.") - parser.add_argument("--do_predict", action='store_true', help="Whether to run eval on the dev set.") - parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") - parser.add_argument("--predict_batch_size", default=8, type=int, help="Total batch size for predictions.") - parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") + parser.add_argument("--do_train", action='store_true', + help="Whether to run training.") + parser.add_argument("--do_eval", action='store_true', + help="Whether to run eval on the dev set.") + parser.add_argument("--evaluate_during_training", action='store_true', + help="Rul evaluation during training at each logging step.") + parser.add_argument("--do_lower_case", action='store_true', + help="Set this flag if you are using an uncased model.") + + parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, + help="Batch size per GPU/CPU for training.") + parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, + help="Batch size per GPU/CPU for evaluation.") + parser.add_argument("--learning_rate", default=5e-5, type=float, + help="The initial learning rate for Adam.") + parser.add_argument('--gradient_accumulation_steps', type=int, default=1, + help="Number of updates steps to accumulate before performing a backward/update pass.") + parser.add_argument("--weight_decay", default=0.0, type=float, + help="Weight deay if we apply some.") + parser.add_argument("--adam_epsilon", default=1e-8, type=float, + help="Epsilon for Adam optimizer.") + parser.add_argument("--max_grad_norm", default=1.0, type=float, + help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") - parser.add_argument("--warmup_proportion", default=0.1, type=float, - help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% " - "of training.") + parser.add_argument("--max_steps", default=-1, type=int, + help="If > 0: set total number of training steps to perform. Override num_train_epochs.") + parser.add_argument("--warmup_steps", default=0, type=int, + help="Linear warmup over warmup_steps.") parser.add_argument("--n_best_size", default=20, type=int, - help="The total number of n-best predictions to generate in the nbest_predictions.json " - "output file.") + help="The total number of n-best predictions to generate in the nbest_predictions.json output file.") parser.add_argument("--max_answer_length", default=30, type=int, help="The maximum length of an answer that can be generated. This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument("--verbose_logging", action='store_true', help="If true, all of the warnings related to data processing will be printed. " "A number of warnings are expected for a normal SQuAD evaluation.") - parser.add_argument("--no_cuda", - action='store_true', + + parser.add_argument('--logging_steps', type=int, default=50, + help="Log every X updates steps.") + parser.add_argument('--save_steps', type=int, default=50, + help="Save checkpoint every X updates steps.") + parser.add_argument("--eval_all_checkpoints", action='store_true', + help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number") + parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") - parser.add_argument('--seed', - type=int, - default=42, + parser.add_argument('--overwrite_output_dir', action='store_true', + help="Overwrite the content of the output directory") + parser.add_argument('--overwrite_cache', action='store_true', + help="Overwrite the cached training and evaluation sets") + parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") - parser.add_argument('--gradient_accumulation_steps', - type=int, - default=1, - help="Number of updates steps to accumulate before performing a backward/update pass.") - parser.add_argument("--do_lower_case", - action='store_true', - help="Whether to lower case the input text. True for uncased models, False for cased models.") - parser.add_argument("--local_rank", - type=int, - default=-1, + + parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") - parser.add_argument('--fp16', - action='store_true', - help="Whether to use 16-bit float precision instead of 32-bit") - parser.add_argument('--loss_scale', - type=float, default=0, - help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" - "0 (default value): dynamic loss scaling.\n" - "Positive power of 2: static loss scaling value.\n") - parser.add_argument('--version_2_with_negative', - action='store_true', - help='If true, the SQuAD examples contain some that do not have an answer.') - parser.add_argument('--null_score_diff_threshold', - type=float, default=0.0, - help="If null_score - best_non_null is greater than the threshold predict null.") + parser.add_argument('--fp16', action='store_true', + help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit") + parser.add_argument('--fp16_opt_level', type=str, default='O1', + help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html") parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") args = parser.parse_args() - print(args) + if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir: + raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir)) + + # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd @@ -846,254 +376,101 @@ def main(): ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() + # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - n_gpu = torch.cuda.device_count() - else: + args.n_gpu = torch.cuda.device_count() + else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) - n_gpu = 1 - # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') + args.n_gpu = 1 + args.device = device + # Setup logging logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt = '%m/%d/%Y %H:%M:%S', level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN) + logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", + args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16) - logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format( - device, n_gpu, bool(args.local_rank != -1), args.fp16)) - - if args.gradient_accumulation_steps < 1: - raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( - args.gradient_accumulation_steps)) + # Set seed + set_seed(args) - args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps + # Load pretrained model and tokenizer + if args.local_rank not in [-1, 0]: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - if n_gpu > 0: - torch.cuda.manual_seed_all(args.seed) - - if not args.do_train and not args.do_predict: - raise ValueError("At least one of `do_train` or `do_predict` must be True.") - - if args.do_train: - if not args.train_file: - raise ValueError( - "If `do_train` is True, then `train_file` must be specified.") - if args.do_predict: - if not args.predict_file: - raise ValueError( - "If `do_predict` is True, then `predict_file` must be specified.") - - if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: - raise ValueError("Output directory () already exists and is not empty.") - if not os.path.exists(args.output_dir): - os.makedirs(args.output_dir) - - tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) + args.model_type = "" + for key in MODEL_CLASSES: + if key in args.model_name.lower(): + args.model_type = key # take the first match in model types + break + config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] + config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name) + tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name, do_lower_case=args.do_lower_case) + model = model_class.from_pretrained(args.model_name, from_tf=bool('.ckpt' in args.model_name), config=config) - # Prepare model - model = BertForQuestionAnswering.from_pretrained(args.bert_model, - cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank))) + if args.local_rank == 0: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - if args.fp16: - model.half() - model.to(device) + # Distributed and parrallel training + model.to(args.device) if args.local_rank != -1: - try: - from apex.parallel import DistributedDataParallel as DDP - except ImportError: - raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") - - model = DDP(model) - elif n_gpu > 1: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], + output_device=args.local_rank, + find_unused_parameters=True) + elif args.n_gpu > 1: model = torch.nn.DataParallel(model) + logger.info("Training/evaluation parameters %s", args) + + # Training if args.do_train: + train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False) + global_step, tr_loss = train(args, train_dataset, model, tokenizer) + logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) - # Prepare data loader - train_examples = read_squad_examples( - input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative) - cached_train_features_file = args.train_file+'_{0}_{1}_{2}_{3}'.format( - list(filter(None, args.bert_model.split('/'))).pop(), str(args.max_seq_length), str(args.doc_stride), str(args.max_query_length)) - try: - with open(cached_train_features_file, "rb") as reader: - train_features = pickle.load(reader) - except: - train_features = convert_examples_to_features( - examples=train_examples, - tokenizer=tokenizer, - max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, - max_query_length=args.max_query_length, - is_training=True) - if args.local_rank == -1 or torch.distributed.get_rank() == 0: - logger.info(" Saving train features into cached file %s", cached_train_features_file) - with open(cached_train_features_file, "wb") as writer: - pickle.dump(train_features, writer) - all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) - all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) - all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) - all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long) - all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long) - train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, - all_start_positions, all_end_positions) - if args.local_rank == -1: - train_sampler = RandomSampler(train_data) - else: - train_sampler = DistributedSampler(train_data) - train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) - num_train_optimization_steps = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs - if args.local_rank != -1: - num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size() - - # Prepare optimizer - - param_optimizer = list(model.named_parameters()) - - # hack to remove pooler, which is not used - # thus it produce None grad that break apex - param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] - - no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] - optimizer_grouped_parameters = [ - {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, - {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} - ] - - if args.fp16: - try: - from apex.optimizers import FP16_Optimizer - from apex.optimizers import FusedAdam - except ImportError: - raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") - - optimizer = FusedAdam(optimizer_grouped_parameters, - lr=args.learning_rate, - bias_correction=False, - max_grad_norm=1.0) - if args.loss_scale == 0: - optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) - else: - optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) - warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion, - t_total=num_train_optimization_steps) - else: - optimizer = BertAdam(optimizer_grouped_parameters, - lr=args.learning_rate, - warmup=args.warmup_proportion, - t_total=num_train_optimization_steps) - - global_step = 0 - - logger.info("***** Running training *****") - logger.info(" Num orig examples = %d", len(train_examples)) - logger.info(" Num split examples = %d", len(train_features)) - logger.info(" Batch size = %d", args.train_batch_size) - logger.info(" Num steps = %d", num_train_optimization_steps) - - model.train() - for _ in trange(int(args.num_train_epochs), desc="Epoch"): - for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])): - if n_gpu == 1: - batch = tuple(t.to(device) for t in batch) # multi-gpu does scattering it-self - input_ids, input_mask, segment_ids, start_positions, end_positions = batch - loss = model(input_ids, segment_ids, input_mask, start_positions, end_positions) - if n_gpu > 1: - loss = loss.mean() # mean() to average on multi-gpu. - if args.gradient_accumulation_steps > 1: - loss = loss / args.gradient_accumulation_steps - - if args.fp16: - optimizer.backward(loss) - else: - loss.backward() - if (step + 1) % args.gradient_accumulation_steps == 0: - if args.fp16: - # modify learning rate with special warm up BERT uses - # if args.fp16 is False, BertAdam is used and handles this automatically - lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion) - for param_group in optimizer.param_groups: - param_group['lr'] = lr_this_step - optimizer.step() - optimizer.zero_grad() - global_step += 1 - - if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): - # Save a trained model, configuration and tokenizer - model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self - - # If we save using the predefined names, we can load using `from_pretrained` - output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) - output_config_file = os.path.join(args.output_dir, CONFIG_NAME) - - torch.save(model_to_save.state_dict(), output_model_file) - model_to_save.config.to_json_file(output_config_file) - tokenizer.save_vocabulary(args.output_dir) + # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() + if args.local_rank == -1 or torch.distributed.get_rank() == 0: + # Create output directory if needed + if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: + os.makedirs(args.output_dir) - # Load a trained model and vocabulary that you have fine-tuned - model = BertForQuestionAnswering.from_pretrained(args.output_dir) - tokenizer = BertTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) - else: - model = BertForQuestionAnswering.from_pretrained(args.bert_model) - - model.to(device) - - if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0): - eval_examples = read_squad_examples( - input_file=args.predict_file, is_training=False, version_2_with_negative=args.version_2_with_negative) - eval_features = convert_examples_to_features( - examples=eval_examples, - tokenizer=tokenizer, - max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, - max_query_length=args.max_query_length, - is_training=False) - - logger.info("***** Running predictions *****") - logger.info(" Num orig examples = %d", len(eval_examples)) - logger.info(" Num split examples = %d", len(eval_features)) - logger.info(" Batch size = %d", args.predict_batch_size) - - all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) - all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) - all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) - all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) - eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) - # Run prediction for full data - eval_sampler = SequentialSampler(eval_data) - eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) + logger.info("Saving model checkpoint to %s", args.output_dir) + # Save a trained model, configuration and tokenizer using `save_pretrained()`. + # They can then be reloaded using `from_pretrained()` + model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training + model_to_save.save_pretrained(args.output_dir) + tokenizer.save_pretrained(args.output_dir) - model.eval() - all_results = [] - logger.info("Start evaluating") - for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating", disable=args.local_rank not in [-1, 0]): - if len(all_results) % 1000 == 0: - logger.info("Processing example: %d" % (len(all_results))) - input_ids = input_ids.to(device) - input_mask = input_mask.to(device) - segment_ids = segment_ids.to(device) - with torch.no_grad(): - batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask) - for i, example_index in enumerate(example_indices): - start_logits = batch_start_logits[i].detach().cpu().tolist() - end_logits = batch_end_logits[i].detach().cpu().tolist() - eval_feature = eval_features[example_index.item()] - unique_id = int(eval_feature.unique_id) - all_results.append(RawResult(unique_id=unique_id, - start_logits=start_logits, - end_logits=end_logits)) - output_prediction_file = os.path.join(args.output_dir, "predictions.json") - output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") - output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json") - write_predictions(eval_examples, eval_features, all_results, - args.n_best_size, args.max_answer_length, - args.do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, args.verbose_logging, - args.version_2_with_negative, args.null_score_diff_threshold) + # Good practice: save your training arguments together with the trained model + torch.save(args, os.path.join(args.output_dir, 'training_args.bin')) + + # Load a trained model and vocabulary that you have fine-tuned + model = model_class.from_pretrained(args.output_dir) + tokenizer = tokenizer_class.from_pretrained(args.output_dir) + model.to(args.device) + + + # Evaluation + results = {} + if args.do_eval and args.local_rank in [-1, 0]: + checkpoints = [args.output_dir] + if args.eval_all_checkpoints: + checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) + logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging + logger.info("Evaluate the following checkpoints: %s", checkpoints) + for checkpoint in checkpoints: + global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" + model = model_class.from_pretrained(checkpoint) + model.to(args.device) + result = evaluate(args, model, tokenizer, prefix=global_step) + result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) + results.update(result) + logger.info("Results: {}".format(results)) + return results if __name__ == "__main__": diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py new file mode 100644 index 00000000..305eeb7b --- /dev/null +++ b/cdqa/reader/utils_squad.py @@ -0,0 +1,743 @@ + +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Load SQuAD dataset. """ + +from __future__ import absolute_import, division, print_function + +import json +import logging +import math +import collections +from io import open + +from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize + +logger = logging.getLogger(__name__) + + +class SquadExample(object): + """ + A single training/test example for the Squad dataset. + For examples without an answer, the start and end position are -1. + """ + + def __init__(self, + qas_id, + question_text, + doc_tokens, + orig_answer_text=None, + start_position=None, + end_position=None, + is_impossible=None): + self.qas_id = qas_id + self.question_text = question_text + self.doc_tokens = doc_tokens + self.orig_answer_text = orig_answer_text + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + def __str__(self): + return self.__repr__() + + def __repr__(self): + s = "" + s += "qas_id: %s" % (self.qas_id) + s += ", question_text: %s" % ( + self.question_text) + s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) + if self.start_position: + s += ", start_position: %d" % (self.start_position) + if self.end_position: + s += ", end_position: %d" % (self.end_position) + if self.is_impossible: + s += ", is_impossible: %r" % (self.is_impossible) + return s + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None): + self.unique_id = unique_id + self.example_index = example_index + self.doc_span_index = doc_span_index + self.tokens = tokens + self.token_to_orig_map = token_to_orig_map + self.token_is_max_context = token_is_max_context + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + +def read_squad_examples(input_file, is_training, version_2_with_negative): + """Read a SQuAD json file into a list of SquadExample.""" + with open(input_file, "r", encoding='utf-8') as reader: + input_data = json.load(reader)["data"] + + def is_whitespace(c): + if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: + return True + return False + + examples = [] + for entry in input_data: + for paragraph in entry["paragraphs"]: + paragraph_text = paragraph["context"] + doc_tokens = [] + char_to_word_offset = [] + prev_is_whitespace = True + for c in paragraph_text: + if is_whitespace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + for qa in paragraph["qas"]: + qas_id = qa["id"] + question_text = qa["question"] + start_position = None + end_position = None + orig_answer_text = None + is_impossible = False + if is_training: + if version_2_with_negative: + is_impossible = qa["is_impossible"] + if (len(qa["answers"]) != 1) and (not is_impossible): + raise ValueError( + "For training, each question should have exactly 1 answer.") + if not is_impossible: + answer = qa["answers"][0] + orig_answer_text = answer["text"] + answer_offset = answer["answer_start"] + answer_length = len(orig_answer_text) + start_position = char_to_word_offset[answer_offset] + end_position = char_to_word_offset[answer_offset + answer_length - 1] + # Only add answers where the text can be exactly recovered from the + # document. If this CAN'T happen it's likely due to weird Unicode + # stuff so we will just skip the example. + # + # Note that this means for training mode, every example is NOT + # guaranteed to be preserved. + actual_text = " ".join(doc_tokens[start_position:(end_position + 1)]) + cleaned_answer_text = " ".join( + whitespace_tokenize(orig_answer_text)) + if actual_text.find(cleaned_answer_text) == -1: + logger.warning("Could not find answer: '%s' vs. '%s'", + actual_text, cleaned_answer_text) + continue + else: + start_position = -1 + end_position = -1 + orig_answer_text = "" + + example = SquadExample( + qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position, + is_impossible=is_impossible) + examples.append(example) + return examples + + +def convert_examples_to_features(examples, tokenizer, max_seq_length, + doc_stride, max_query_length, is_training): + """Loads a data file into a list of `InputBatch`s.""" + + unique_id = 1000000000 + + features = [] + for (example_index, example) in enumerate(examples): + query_tokens = tokenizer.tokenize(example.question_text) + + if len(query_tokens) > max_query_length: + query_tokens = query_tokens[0:max_query_length] + + tok_to_orig_index = [] + orig_to_tok_index = [] + all_doc_tokens = [] + for (i, token) in enumerate(example.doc_tokens): + orig_to_tok_index.append(len(all_doc_tokens)) + sub_tokens = tokenizer.tokenize(token) + for sub_token in sub_tokens: + tok_to_orig_index.append(i) + all_doc_tokens.append(sub_token) + + tok_start_position = None + tok_end_position = None + if is_training and example.is_impossible: + tok_start_position = -1 + tok_end_position = -1 + if is_training and not example.is_impossible: + tok_start_position = orig_to_tok_index[example.start_position] + if example.end_position < len(example.doc_tokens) - 1: + tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 + else: + tok_end_position = len(all_doc_tokens) - 1 + (tok_start_position, tok_end_position) = _improve_answer_span( + all_doc_tokens, tok_start_position, tok_end_position, tokenizer, + example.orig_answer_text) + + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple( # pylint: disable=invalid-name + "DocSpan", ["start", "length"]) + doc_spans = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, doc_stride) + + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_to_orig_map = {} + token_is_max_context = {} + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in query_tokens: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + start_position = None + end_position = None + if is_training and not example.is_impossible: + # For training, if our document chunk does not contain an annotation + # we throw it out, since there is nothing to predict. + doc_start = doc_span.start + doc_end = doc_span.start + doc_span.length - 1 + out_of_span = False + if not (tok_start_position >= doc_start and + tok_end_position <= doc_end): + out_of_span = True + if out_of_span: + start_position = 0 + end_position = 0 + else: + doc_offset = len(query_tokens) + 2 + start_position = tok_start_position - doc_start + doc_offset + end_position = tok_end_position - doc_start + doc_offset + if is_training and example.is_impossible: + start_position = 0 + end_position = 0 + if example_index < 20: + logger.info("*** Example ***") + logger.info("unique_id: %s" % (unique_id)) + logger.info("example_index: %s" % (example_index)) + logger.info("doc_span_index: %s" % (doc_span_index)) + logger.info("tokens: %s" % " ".join(tokens)) + logger.info("token_to_orig_map: %s" % " ".join([ + "%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])) + logger.info("token_is_max_context: %s" % " ".join([ + "%d:%s" % (x, y) for (x, y) in token_is_max_context.items() + ])) + logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) + logger.info( + "input_mask: %s" % " ".join([str(x) for x in input_mask])) + logger.info( + "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) + if is_training and example.is_impossible: + logger.info("impossible example") + if is_training and not example.is_impossible: + answer_text = " ".join(tokens[start_position:(end_position + 1)]) + logger.info("start_position: %d" % (start_position)) + logger.info("end_position: %d" % (end_position)) + logger.info( + "answer: %s" % (answer_text)) + + features.append( + InputFeatures( + unique_id=unique_id, + example_index=example_index, + doc_span_index=doc_span_index, + tokens=tokens, + token_to_orig_map=token_to_orig_map, + token_is_max_context=token_is_max_context, + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + start_position=start_position, + end_position=end_position, + is_impossible=example.is_impossible)) + unique_id += 1 + + return features + + +def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, + orig_answer_text): + """Returns tokenized answer spans that better match the annotated answer.""" + + # The SQuAD annotations are character based. We first project them to + # whitespace-tokenized words. But then after WordPiece tokenization, we can + # often find a "better match". For example: + # + # Question: What year was John Smith born? + # Context: The leader was John Smith (1895-1943). + # Answer: 1895 + # + # The original whitespace-tokenized answer will be "(1895-1943).". However + # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match + # the exact answer, 1895. + # + # However, this is not always possible. Consider the following: + # + # Question: What country is the top exporter of electornics? + # Context: The Japanese electronics industry is the lagest in the world. + # Answer: Japan + # + # In this case, the annotator chose "Japan" as a character sub-span of + # the word "Japanese". Since our WordPiece tokenizer does not split + # "Japanese", we just use "Japanese" as the annotation. This is fairly rare + # in SQuAD, but does happen. + tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) + + for new_start in range(input_start, input_end + 1): + for new_end in range(input_end, new_start - 1, -1): + text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) + if text_span == tok_answer_text: + return (new_start, new_end) + + return (input_start, input_end) + + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index + + +RawResult = collections.namedtuple("RawResult", + ["unique_id", "start_logits", "end_logits"]) + + +def write_predictions(all_examples, all_features, all_results, n_best_size, + max_answer_length, do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, verbose_logging, + version_2_with_negative, null_score_diff_threshold): + """Write final predictions to the json file and log-odds of null if needed.""" + logger.info("Writing predictions to: %s" % (output_prediction_file)) + logger.info("Writing nbest to: %s" % (output_nbest_file)) + + example_index_to_features = collections.defaultdict(list) + for feature in all_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + scores_diff_json = collections.OrderedDict() + + for (example_index, example) in enumerate(all_examples): + features = example_index_to_features[example_index] + + prelim_predictions = [] + # keep track of the minimum score of null start+end of position 0 + score_null = 1000000 # large and positive + min_null_feature_index = 0 # the paragraph slice with min null score + null_start_logit = 0 # the start logit at the slice with min null score + null_end_logit = 0 # the end logit at the slice with min null score + for (feature_index, feature) in enumerate(features): + result = unique_id_to_result[feature.unique_id] + start_indexes = _get_best_indexes(result.start_logits, n_best_size) + end_indexes = _get_best_indexes(result.end_logits, n_best_size) + # if we could have irrelevant answers, get the min score of irrelevant + if version_2_with_negative: + feature_null_score = result.start_logits[0] + result.end_logits[0] + if feature_null_score < score_null: + score_null = feature_null_score + min_null_feature_index = feature_index + null_start_logit = result.start_logits[0] + null_end_logit = result.end_logits[0] + for start_index in start_indexes: + for end_index in end_indexes: + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_logit=result.start_logits[start_index], + end_logit=result.end_logits[end_index])) + if version_2_with_negative: + prelim_predictions.append( + _PrelimPrediction( + feature_index=min_null_feature_index, + start_index=0, + end_index=0, + start_logit=null_start_logit, + end_logit=null_end_logit)) + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True) + + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_logit", "end_logit"]) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_index] + if pred.start_index > 0: # this is a non-null prediction + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = " ".join(tok_tokens) + + # De-tokenize WordPieces that have been split off. + tok_text = tok_text.replace(" ##", "") + tok_text = tok_text.replace("##", "") + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, do_lower_case, verbose_logging) + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + else: + final_text = "" + seen_predictions[final_text] = True + + nbest.append( + _NbestPrediction( + text=final_text, + start_logit=pred.start_logit, + end_logit=pred.end_logit)) + # if we didn't include the empty option in the n-best, include it + if version_2_with_negative: + if "" not in seen_predictions: + nbest.append( + _NbestPrediction( + text="", + start_logit=null_start_logit, + end_logit=null_end_logit)) + + # In very rare edge cases we could only have single null prediction. + # So we just create a nonce prediction in this case to avoid failure. + if len(nbest)==1: + nbest.insert(0, + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + assert len(nbest) >= 1 + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_logit + entry.end_logit) + if not best_non_null_entry: + if entry.text: + best_non_null_entry = entry + + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_logit"] = entry.start_logit + output["end_logit"] = entry.end_logit + nbest_json.append(output) + + assert len(nbest_json) >= 1 + + if not version_2_with_negative: + all_predictions[example.qas_id] = nbest_json[0]["text"] + else: + # predict "" iff the null score - the score of best non-null > threshold + score_diff = score_null - best_non_null_entry.start_logit - ( + best_non_null_entry.end_logit) + scores_diff_json[example.qas_id] = score_diff + if score_diff > null_score_diff_threshold: + all_predictions[example.qas_id] = "" + else: + all_predictions[example.qas_id] = best_non_null_entry.text + all_nbest_json[example.qas_id] = nbest_json + + with open(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") + + with open(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + + if version_2_with_negative: + with open(output_null_log_odds_file, "w") as writer: + writer.write(json.dumps(scores_diff_json, indent=4) + "\n") + + return all_predictions + + +def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): + """Project the tokenized prediction back to the original text.""" + + # When we created the data, we kept track of the alignment between original + # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So + # now `orig_text` contains the span of our original text corresponding to the + # span that we predicted. + # + # However, `orig_text` may contain extra characters that we don't want in + # our prediction. + # + # For example, let's say: + # pred_text = steve smith + # orig_text = Steve Smith's + # + # We don't want to return `orig_text` because it contains the extra "'s". + # + # We don't want to return `pred_text` because it's already been normalized + # (the SQuAD eval script also does punctuation stripping/lower casing but + # our tokenizer does additional normalization like stripping accent + # characters). + # + # What we really want to return is "Steve Smith". + # + # Therefore, we have to apply a semi-complicated alignment heuristic between + # `pred_text` and `orig_text` to get a character-to-character alignment. This + # can fail in certain cases in which case we just return `orig_text`. + + def _strip_spaces(text): + ns_chars = [] + ns_to_s_map = collections.OrderedDict() + for (i, c) in enumerate(text): + if c == " ": + continue + ns_to_s_map[len(ns_chars)] = i + ns_chars.append(c) + ns_text = "".join(ns_chars) + return (ns_text, ns_to_s_map) + + # We first tokenize `orig_text`, strip whitespace from the result + # and `pred_text`, and check if they are the same length. If they are + # NOT the same length, the heuristic has failed. If they are the same + # length, we assume the characters are one-to-one aligned. + tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + + tok_text = " ".join(tokenizer.tokenize(orig_text)) + + start_position = tok_text.find(pred_text) + if start_position == -1: + if verbose_logging: + logger.info( + "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) + return orig_text + end_position = start_position + len(pred_text) - 1 + + (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) + (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) + + if len(orig_ns_text) != len(tok_ns_text): + if verbose_logging: + logger.info("Length not equal after stripping spaces: '%s' vs '%s'", + orig_ns_text, tok_ns_text) + return orig_text + + # We then project the characters in `pred_text` back to `orig_text` using + # the character-to-character alignment. + tok_s_to_ns_map = {} + for (i, tok_index) in tok_ns_to_s_map.items(): + tok_s_to_ns_map[tok_index] = i + + orig_start_position = None + if start_position in tok_s_to_ns_map: + ns_start_position = tok_s_to_ns_map[start_position] + if ns_start_position in orig_ns_to_s_map: + orig_start_position = orig_ns_to_s_map[ns_start_position] + + if orig_start_position is None: + if verbose_logging: + logger.info("Couldn't map start position") + return orig_text + + orig_end_position = None + if end_position in tok_s_to_ns_map: + ns_end_position = tok_s_to_ns_map[end_position] + if ns_end_position in orig_ns_to_s_map: + orig_end_position = orig_ns_to_s_map[ns_end_position] + + if orig_end_position is None: + if verbose_logging: + logger.info("Couldn't map end position") + return orig_text + + output_text = orig_text[orig_start_position:(orig_end_position + 1)] + return output_text + + +def _get_best_indexes(logits, n_best_size): + """Get the n-best logits from a list.""" + index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) + + best_indexes = [] + for i in range(len(index_and_score)): + if i >= n_best_size: + break + best_indexes.append(index_and_score[i][0]) + return best_indexes + + +def _compute_softmax(scores): + """Compute softmax probability over raw logits.""" + if not scores: + return [] + + max_score = None + for score in scores: + if max_score is None or score > max_score: + max_score = score + + exp_scores = [] + total_sum = 0.0 + for score in scores: + x = math.exp(score - max_score) + exp_scores.append(x) + total_sum += x + + probs = [] + for score in exp_scores: + probs.append(score / total_sum) + return probs diff --git a/cdqa/reader/utils_squad_evaluate.py b/cdqa/reader/utils_squad_evaluate.py new file mode 100644 index 00000000..d0cf643f --- /dev/null +++ b/cdqa/reader/utils_squad_evaluate.py @@ -0,0 +1,289 @@ +"""Official evaluation script for SQuAD version 2.0. + +In addition to basic functionality, we also compute additional statistics and +plot precision-recall curves if an additional na_prob.json file is provided. +This file is expected to map question ID's to the model's predicted probability +that a question is unanswerable. +""" +import argparse +import collections +import json +import numpy as np +import os +import re +import string +import sys + +class EVAL_OPTS(): + def __init__(self, data_file, pred_file, out_file="", + na_prob_file="na_prob.json", na_prob_thresh=1.0, + out_image_dir=None, verbose=False): + self.data_file = data_file + self.pred_file = pred_file + self.out_file = out_file + self.na_prob_file = na_prob_file + self.na_prob_thresh = na_prob_thresh + self.out_image_dir = out_image_dir + self.verbose = verbose + +OPTS = None + +def parse_args(): + parser = argparse.ArgumentParser('Official evaluation script for SQuAD version 2.0.') + parser.add_argument('data_file', metavar='data.json', help='Input data JSON file.') + parser.add_argument('pred_file', metavar='pred.json', help='Model predictions.') + parser.add_argument('--out-file', '-o', metavar='eval.json', + help='Write accuracy metrics to file (default is stdout).') + parser.add_argument('--na-prob-file', '-n', metavar='na_prob.json', + help='Model estimates of probability of no answer.') + parser.add_argument('--na-prob-thresh', '-t', type=float, default=1.0, + help='Predict "" if no-answer probability exceeds this (default = 1.0).') + parser.add_argument('--out-image-dir', '-p', metavar='out_images', default=None, + help='Save precision-recall curves to directory.') + parser.add_argument('--verbose', '-v', action='store_true') + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + return parser.parse_args() + +def make_qid_to_has_ans(dataset): + qid_to_has_ans = {} + for article in dataset: + for p in article['paragraphs']: + for qa in p['qas']: + qid_to_has_ans[qa['id']] = bool(qa['answers']) + return qid_to_has_ans + +def normalize_answer(s): + """Lower text and remove punctuation, articles and extra whitespace.""" + def remove_articles(text): + regex = re.compile(r'\b(a|an|the)\b', re.UNICODE) + return re.sub(regex, ' ', text) + def white_space_fix(text): + return ' '.join(text.split()) + def remove_punc(text): + exclude = set(string.punctuation) + return ''.join(ch for ch in text if ch not in exclude) + def lower(text): + return text.lower() + return white_space_fix(remove_articles(remove_punc(lower(s)))) + +def get_tokens(s): + if not s: return [] + return normalize_answer(s).split() + +def compute_exact(a_gold, a_pred): + return int(normalize_answer(a_gold) == normalize_answer(a_pred)) + +def compute_f1(a_gold, a_pred): + gold_toks = get_tokens(a_gold) + pred_toks = get_tokens(a_pred) + common = collections.Counter(gold_toks) & collections.Counter(pred_toks) + num_same = sum(common.values()) + if len(gold_toks) == 0 or len(pred_toks) == 0: + # If either is no-answer, then F1 is 1 if they agree, 0 otherwise + return int(gold_toks == pred_toks) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(pred_toks) + recall = 1.0 * num_same / len(gold_toks) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + +def get_raw_scores(dataset, preds): + exact_scores = {} + f1_scores = {} + for article in dataset: + for p in article['paragraphs']: + for qa in p['qas']: + qid = qa['id'] + gold_answers = [a['text'] for a in qa['answers'] + if normalize_answer(a['text'])] + if not gold_answers: + # For unanswerable questions, only correct answer is empty string + gold_answers = [''] + if qid not in preds: + print('Missing prediction for %s' % qid) + continue + a_pred = preds[qid] + # Take max over all gold answers + exact_scores[qid] = max(compute_exact(a, a_pred) for a in gold_answers) + f1_scores[qid] = max(compute_f1(a, a_pred) for a in gold_answers) + return exact_scores, f1_scores + +def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thresh): + new_scores = {} + for qid, s in scores.items(): + pred_na = na_probs[qid] > na_prob_thresh + if pred_na: + new_scores[qid] = float(not qid_to_has_ans[qid]) + else: + new_scores[qid] = s + return new_scores + +def make_eval_dict(exact_scores, f1_scores, qid_list=None): + if not qid_list: + total = len(exact_scores) + return collections.OrderedDict([ + ('exact', 100.0 * sum(exact_scores.values()) / total), + ('f1', 100.0 * sum(f1_scores.values()) / total), + ('total', total), + ]) + else: + total = len(qid_list) + return collections.OrderedDict([ + ('exact', 100.0 * sum(exact_scores[k] for k in qid_list) / total), + ('f1', 100.0 * sum(f1_scores[k] for k in qid_list) / total), + ('total', total), + ]) + +def merge_eval(main_eval, new_eval, prefix): + for k in new_eval: + main_eval['%s_%s' % (prefix, k)] = new_eval[k] + +def plot_pr_curve(precisions, recalls, out_image, title): + plt.step(recalls, precisions, color='b', alpha=0.2, where='post') + plt.fill_between(recalls, precisions, step='post', alpha=0.2, color='b') + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.xlim([0.0, 1.05]) + plt.ylim([0.0, 1.05]) + plt.title(title) + plt.savefig(out_image) + plt.clf() + +def make_precision_recall_eval(scores, na_probs, num_true_pos, qid_to_has_ans, + out_image=None, title=None): + qid_list = sorted(na_probs, key=lambda k: na_probs[k]) + true_pos = 0.0 + cur_p = 1.0 + cur_r = 0.0 + precisions = [1.0] + recalls = [0.0] + avg_prec = 0.0 + for i, qid in enumerate(qid_list): + if qid_to_has_ans[qid]: + true_pos += scores[qid] + cur_p = true_pos / float(i+1) + cur_r = true_pos / float(num_true_pos) + if i == len(qid_list) - 1 or na_probs[qid] != na_probs[qid_list[i+1]]: + # i.e., if we can put a threshold after this point + avg_prec += cur_p * (cur_r - recalls[-1]) + precisions.append(cur_p) + recalls.append(cur_r) + if out_image: + plot_pr_curve(precisions, recalls, out_image, title) + return {'ap': 100.0 * avg_prec} + +def run_precision_recall_analysis(main_eval, exact_raw, f1_raw, na_probs, + qid_to_has_ans, out_image_dir): + if out_image_dir and not os.path.exists(out_image_dir): + os.makedirs(out_image_dir) + num_true_pos = sum(1 for v in qid_to_has_ans.values() if v) + if num_true_pos == 0: + return + pr_exact = make_precision_recall_eval( + exact_raw, na_probs, num_true_pos, qid_to_has_ans, + out_image=os.path.join(out_image_dir, 'pr_exact.png'), + title='Precision-Recall curve for Exact Match score') + pr_f1 = make_precision_recall_eval( + f1_raw, na_probs, num_true_pos, qid_to_has_ans, + out_image=os.path.join(out_image_dir, 'pr_f1.png'), + title='Precision-Recall curve for F1 score') + oracle_scores = {k: float(v) for k, v in qid_to_has_ans.items()} + pr_oracle = make_precision_recall_eval( + oracle_scores, na_probs, num_true_pos, qid_to_has_ans, + out_image=os.path.join(out_image_dir, 'pr_oracle.png'), + title='Oracle Precision-Recall curve (binary task of HasAns vs. NoAns)') + merge_eval(main_eval, pr_exact, 'pr_exact') + merge_eval(main_eval, pr_f1, 'pr_f1') + merge_eval(main_eval, pr_oracle, 'pr_oracle') + +def histogram_na_prob(na_probs, qid_list, image_dir, name): + if not qid_list: + return + x = [na_probs[k] for k in qid_list] + weights = np.ones_like(x) / float(len(x)) + plt.hist(x, weights=weights, bins=20, range=(0.0, 1.0)) + plt.xlabel('Model probability of no-answer') + plt.ylabel('Proportion of dataset') + plt.title('Histogram of no-answer probability: %s' % name) + plt.savefig(os.path.join(image_dir, 'na_prob_hist_%s.png' % name)) + plt.clf() + +def find_best_thresh(preds, scores, na_probs, qid_to_has_ans): + num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k]) + cur_score = num_no_ans + best_score = cur_score + best_thresh = 0.0 + qid_list = sorted(na_probs, key=lambda k: na_probs[k]) + for i, qid in enumerate(qid_list): + if qid not in scores: continue + if qid_to_has_ans[qid]: + diff = scores[qid] + else: + if preds[qid]: + diff = -1 + else: + diff = 0 + cur_score += diff + if cur_score > best_score: + best_score = cur_score + best_thresh = na_probs[qid] + return 100.0 * best_score / len(scores), best_thresh + +def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans): + best_exact, exact_thresh = find_best_thresh(preds, exact_raw, na_probs, qid_to_has_ans) + best_f1, f1_thresh = find_best_thresh(preds, f1_raw, na_probs, qid_to_has_ans) + main_eval['best_exact'] = best_exact + main_eval['best_exact_thresh'] = exact_thresh + main_eval['best_f1'] = best_f1 + main_eval['best_f1_thresh'] = f1_thresh + +def main(OPTS): + with open(OPTS.data_file) as f: + dataset_json = json.load(f) + dataset = dataset_json['data'] + with open(OPTS.pred_file) as f: + preds = json.load(f) + if OPTS.na_prob_file: + with open(OPTS.na_prob_file) as f: + na_probs = json.load(f) + else: + na_probs = {k: 0.0 for k in preds} + qid_to_has_ans = make_qid_to_has_ans(dataset) # maps qid to True/False + has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] + no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] + exact_raw, f1_raw = get_raw_scores(dataset, preds) + exact_thresh = apply_no_ans_threshold(exact_raw, na_probs, qid_to_has_ans, + OPTS.na_prob_thresh) + f1_thresh = apply_no_ans_threshold(f1_raw, na_probs, qid_to_has_ans, + OPTS.na_prob_thresh) + out_eval = make_eval_dict(exact_thresh, f1_thresh) + if has_ans_qids: + has_ans_eval = make_eval_dict(exact_thresh, f1_thresh, qid_list=has_ans_qids) + merge_eval(out_eval, has_ans_eval, 'HasAns') + if no_ans_qids: + no_ans_eval = make_eval_dict(exact_thresh, f1_thresh, qid_list=no_ans_qids) + merge_eval(out_eval, no_ans_eval, 'NoAns') + if OPTS.na_prob_file: + find_all_best_thresh(out_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans) + if OPTS.na_prob_file and OPTS.out_image_dir: + run_precision_recall_analysis(out_eval, exact_raw, f1_raw, na_probs, + qid_to_has_ans, OPTS.out_image_dir) + histogram_na_prob(na_probs, has_ans_qids, OPTS.out_image_dir, 'hasAns') + histogram_na_prob(na_probs, no_ans_qids, OPTS.out_image_dir, 'noAns') + if OPTS.out_file: + with open(OPTS.out_file, 'w') as f: + json.dump(out_eval, f) + else: + print(json.dumps(out_eval, indent=2)) + return out_eval + +if __name__ == '__main__': + OPTS = parse_args() + if OPTS.out_image_dir: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + main(OPTS) From 5fb00f2de29b4f27e43c2fa2844552cb78c1f5f2 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Fri, 12 Jul 2019 15:09:45 +0200 Subject: [PATCH 02/43] prepare for reverse-engineering and adaptation to HF release --- .../{ => hf_original_examples}/run_squad.py | 0 .../hf_original_examples/utils_squad.py | 743 ++++++++++++++++++ .../utils_squad_evaluate.py | 0 cdqa/reader/reader_sklearn.py | 0 cdqa/reader/utils_squad.py | 743 ------------------ 5 files changed, 743 insertions(+), 743 deletions(-) rename cdqa/reader/{ => hf_original_examples}/run_squad.py (100%) create mode 100644 cdqa/reader/hf_original_examples/utils_squad.py rename cdqa/reader/{ => hf_original_examples}/utils_squad_evaluate.py (100%) create mode 100644 cdqa/reader/reader_sklearn.py diff --git a/cdqa/reader/run_squad.py b/cdqa/reader/hf_original_examples/run_squad.py similarity index 100% rename from cdqa/reader/run_squad.py rename to cdqa/reader/hf_original_examples/run_squad.py diff --git a/cdqa/reader/hf_original_examples/utils_squad.py b/cdqa/reader/hf_original_examples/utils_squad.py new file mode 100644 index 00000000..305eeb7b --- /dev/null +++ b/cdqa/reader/hf_original_examples/utils_squad.py @@ -0,0 +1,743 @@ + +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Load SQuAD dataset. """ + +from __future__ import absolute_import, division, print_function + +import json +import logging +import math +import collections +from io import open + +from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize + +logger = logging.getLogger(__name__) + + +class SquadExample(object): + """ + A single training/test example for the Squad dataset. + For examples without an answer, the start and end position are -1. + """ + + def __init__(self, + qas_id, + question_text, + doc_tokens, + orig_answer_text=None, + start_position=None, + end_position=None, + is_impossible=None): + self.qas_id = qas_id + self.question_text = question_text + self.doc_tokens = doc_tokens + self.orig_answer_text = orig_answer_text + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + def __str__(self): + return self.__repr__() + + def __repr__(self): + s = "" + s += "qas_id: %s" % (self.qas_id) + s += ", question_text: %s" % ( + self.question_text) + s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) + if self.start_position: + s += ", start_position: %d" % (self.start_position) + if self.end_position: + s += ", end_position: %d" % (self.end_position) + if self.is_impossible: + s += ", is_impossible: %r" % (self.is_impossible) + return s + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None): + self.unique_id = unique_id + self.example_index = example_index + self.doc_span_index = doc_span_index + self.tokens = tokens + self.token_to_orig_map = token_to_orig_map + self.token_is_max_context = token_is_max_context + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + +def read_squad_examples(input_file, is_training, version_2_with_negative): + """Read a SQuAD json file into a list of SquadExample.""" + with open(input_file, "r", encoding='utf-8') as reader: + input_data = json.load(reader)["data"] + + def is_whitespace(c): + if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: + return True + return False + + examples = [] + for entry in input_data: + for paragraph in entry["paragraphs"]: + paragraph_text = paragraph["context"] + doc_tokens = [] + char_to_word_offset = [] + prev_is_whitespace = True + for c in paragraph_text: + if is_whitespace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + for qa in paragraph["qas"]: + qas_id = qa["id"] + question_text = qa["question"] + start_position = None + end_position = None + orig_answer_text = None + is_impossible = False + if is_training: + if version_2_with_negative: + is_impossible = qa["is_impossible"] + if (len(qa["answers"]) != 1) and (not is_impossible): + raise ValueError( + "For training, each question should have exactly 1 answer.") + if not is_impossible: + answer = qa["answers"][0] + orig_answer_text = answer["text"] + answer_offset = answer["answer_start"] + answer_length = len(orig_answer_text) + start_position = char_to_word_offset[answer_offset] + end_position = char_to_word_offset[answer_offset + answer_length - 1] + # Only add answers where the text can be exactly recovered from the + # document. If this CAN'T happen it's likely due to weird Unicode + # stuff so we will just skip the example. + # + # Note that this means for training mode, every example is NOT + # guaranteed to be preserved. + actual_text = " ".join(doc_tokens[start_position:(end_position + 1)]) + cleaned_answer_text = " ".join( + whitespace_tokenize(orig_answer_text)) + if actual_text.find(cleaned_answer_text) == -1: + logger.warning("Could not find answer: '%s' vs. '%s'", + actual_text, cleaned_answer_text) + continue + else: + start_position = -1 + end_position = -1 + orig_answer_text = "" + + example = SquadExample( + qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position, + is_impossible=is_impossible) + examples.append(example) + return examples + + +def convert_examples_to_features(examples, tokenizer, max_seq_length, + doc_stride, max_query_length, is_training): + """Loads a data file into a list of `InputBatch`s.""" + + unique_id = 1000000000 + + features = [] + for (example_index, example) in enumerate(examples): + query_tokens = tokenizer.tokenize(example.question_text) + + if len(query_tokens) > max_query_length: + query_tokens = query_tokens[0:max_query_length] + + tok_to_orig_index = [] + orig_to_tok_index = [] + all_doc_tokens = [] + for (i, token) in enumerate(example.doc_tokens): + orig_to_tok_index.append(len(all_doc_tokens)) + sub_tokens = tokenizer.tokenize(token) + for sub_token in sub_tokens: + tok_to_orig_index.append(i) + all_doc_tokens.append(sub_token) + + tok_start_position = None + tok_end_position = None + if is_training and example.is_impossible: + tok_start_position = -1 + tok_end_position = -1 + if is_training and not example.is_impossible: + tok_start_position = orig_to_tok_index[example.start_position] + if example.end_position < len(example.doc_tokens) - 1: + tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 + else: + tok_end_position = len(all_doc_tokens) - 1 + (tok_start_position, tok_end_position) = _improve_answer_span( + all_doc_tokens, tok_start_position, tok_end_position, tokenizer, + example.orig_answer_text) + + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple( # pylint: disable=invalid-name + "DocSpan", ["start", "length"]) + doc_spans = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, doc_stride) + + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_to_orig_map = {} + token_is_max_context = {} + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in query_tokens: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + start_position = None + end_position = None + if is_training and not example.is_impossible: + # For training, if our document chunk does not contain an annotation + # we throw it out, since there is nothing to predict. + doc_start = doc_span.start + doc_end = doc_span.start + doc_span.length - 1 + out_of_span = False + if not (tok_start_position >= doc_start and + tok_end_position <= doc_end): + out_of_span = True + if out_of_span: + start_position = 0 + end_position = 0 + else: + doc_offset = len(query_tokens) + 2 + start_position = tok_start_position - doc_start + doc_offset + end_position = tok_end_position - doc_start + doc_offset + if is_training and example.is_impossible: + start_position = 0 + end_position = 0 + if example_index < 20: + logger.info("*** Example ***") + logger.info("unique_id: %s" % (unique_id)) + logger.info("example_index: %s" % (example_index)) + logger.info("doc_span_index: %s" % (doc_span_index)) + logger.info("tokens: %s" % " ".join(tokens)) + logger.info("token_to_orig_map: %s" % " ".join([ + "%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])) + logger.info("token_is_max_context: %s" % " ".join([ + "%d:%s" % (x, y) for (x, y) in token_is_max_context.items() + ])) + logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) + logger.info( + "input_mask: %s" % " ".join([str(x) for x in input_mask])) + logger.info( + "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) + if is_training and example.is_impossible: + logger.info("impossible example") + if is_training and not example.is_impossible: + answer_text = " ".join(tokens[start_position:(end_position + 1)]) + logger.info("start_position: %d" % (start_position)) + logger.info("end_position: %d" % (end_position)) + logger.info( + "answer: %s" % (answer_text)) + + features.append( + InputFeatures( + unique_id=unique_id, + example_index=example_index, + doc_span_index=doc_span_index, + tokens=tokens, + token_to_orig_map=token_to_orig_map, + token_is_max_context=token_is_max_context, + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + start_position=start_position, + end_position=end_position, + is_impossible=example.is_impossible)) + unique_id += 1 + + return features + + +def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, + orig_answer_text): + """Returns tokenized answer spans that better match the annotated answer.""" + + # The SQuAD annotations are character based. We first project them to + # whitespace-tokenized words. But then after WordPiece tokenization, we can + # often find a "better match". For example: + # + # Question: What year was John Smith born? + # Context: The leader was John Smith (1895-1943). + # Answer: 1895 + # + # The original whitespace-tokenized answer will be "(1895-1943).". However + # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match + # the exact answer, 1895. + # + # However, this is not always possible. Consider the following: + # + # Question: What country is the top exporter of electornics? + # Context: The Japanese electronics industry is the lagest in the world. + # Answer: Japan + # + # In this case, the annotator chose "Japan" as a character sub-span of + # the word "Japanese". Since our WordPiece tokenizer does not split + # "Japanese", we just use "Japanese" as the annotation. This is fairly rare + # in SQuAD, but does happen. + tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) + + for new_start in range(input_start, input_end + 1): + for new_end in range(input_end, new_start - 1, -1): + text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) + if text_span == tok_answer_text: + return (new_start, new_end) + + return (input_start, input_end) + + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index + + +RawResult = collections.namedtuple("RawResult", + ["unique_id", "start_logits", "end_logits"]) + + +def write_predictions(all_examples, all_features, all_results, n_best_size, + max_answer_length, do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, verbose_logging, + version_2_with_negative, null_score_diff_threshold): + """Write final predictions to the json file and log-odds of null if needed.""" + logger.info("Writing predictions to: %s" % (output_prediction_file)) + logger.info("Writing nbest to: %s" % (output_nbest_file)) + + example_index_to_features = collections.defaultdict(list) + for feature in all_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + scores_diff_json = collections.OrderedDict() + + for (example_index, example) in enumerate(all_examples): + features = example_index_to_features[example_index] + + prelim_predictions = [] + # keep track of the minimum score of null start+end of position 0 + score_null = 1000000 # large and positive + min_null_feature_index = 0 # the paragraph slice with min null score + null_start_logit = 0 # the start logit at the slice with min null score + null_end_logit = 0 # the end logit at the slice with min null score + for (feature_index, feature) in enumerate(features): + result = unique_id_to_result[feature.unique_id] + start_indexes = _get_best_indexes(result.start_logits, n_best_size) + end_indexes = _get_best_indexes(result.end_logits, n_best_size) + # if we could have irrelevant answers, get the min score of irrelevant + if version_2_with_negative: + feature_null_score = result.start_logits[0] + result.end_logits[0] + if feature_null_score < score_null: + score_null = feature_null_score + min_null_feature_index = feature_index + null_start_logit = result.start_logits[0] + null_end_logit = result.end_logits[0] + for start_index in start_indexes: + for end_index in end_indexes: + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_logit=result.start_logits[start_index], + end_logit=result.end_logits[end_index])) + if version_2_with_negative: + prelim_predictions.append( + _PrelimPrediction( + feature_index=min_null_feature_index, + start_index=0, + end_index=0, + start_logit=null_start_logit, + end_logit=null_end_logit)) + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True) + + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_logit", "end_logit"]) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_index] + if pred.start_index > 0: # this is a non-null prediction + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = " ".join(tok_tokens) + + # De-tokenize WordPieces that have been split off. + tok_text = tok_text.replace(" ##", "") + tok_text = tok_text.replace("##", "") + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, do_lower_case, verbose_logging) + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + else: + final_text = "" + seen_predictions[final_text] = True + + nbest.append( + _NbestPrediction( + text=final_text, + start_logit=pred.start_logit, + end_logit=pred.end_logit)) + # if we didn't include the empty option in the n-best, include it + if version_2_with_negative: + if "" not in seen_predictions: + nbest.append( + _NbestPrediction( + text="", + start_logit=null_start_logit, + end_logit=null_end_logit)) + + # In very rare edge cases we could only have single null prediction. + # So we just create a nonce prediction in this case to avoid failure. + if len(nbest)==1: + nbest.insert(0, + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + assert len(nbest) >= 1 + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_logit + entry.end_logit) + if not best_non_null_entry: + if entry.text: + best_non_null_entry = entry + + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_logit"] = entry.start_logit + output["end_logit"] = entry.end_logit + nbest_json.append(output) + + assert len(nbest_json) >= 1 + + if not version_2_with_negative: + all_predictions[example.qas_id] = nbest_json[0]["text"] + else: + # predict "" iff the null score - the score of best non-null > threshold + score_diff = score_null - best_non_null_entry.start_logit - ( + best_non_null_entry.end_logit) + scores_diff_json[example.qas_id] = score_diff + if score_diff > null_score_diff_threshold: + all_predictions[example.qas_id] = "" + else: + all_predictions[example.qas_id] = best_non_null_entry.text + all_nbest_json[example.qas_id] = nbest_json + + with open(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") + + with open(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + + if version_2_with_negative: + with open(output_null_log_odds_file, "w") as writer: + writer.write(json.dumps(scores_diff_json, indent=4) + "\n") + + return all_predictions + + +def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): + """Project the tokenized prediction back to the original text.""" + + # When we created the data, we kept track of the alignment between original + # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So + # now `orig_text` contains the span of our original text corresponding to the + # span that we predicted. + # + # However, `orig_text` may contain extra characters that we don't want in + # our prediction. + # + # For example, let's say: + # pred_text = steve smith + # orig_text = Steve Smith's + # + # We don't want to return `orig_text` because it contains the extra "'s". + # + # We don't want to return `pred_text` because it's already been normalized + # (the SQuAD eval script also does punctuation stripping/lower casing but + # our tokenizer does additional normalization like stripping accent + # characters). + # + # What we really want to return is "Steve Smith". + # + # Therefore, we have to apply a semi-complicated alignment heuristic between + # `pred_text` and `orig_text` to get a character-to-character alignment. This + # can fail in certain cases in which case we just return `orig_text`. + + def _strip_spaces(text): + ns_chars = [] + ns_to_s_map = collections.OrderedDict() + for (i, c) in enumerate(text): + if c == " ": + continue + ns_to_s_map[len(ns_chars)] = i + ns_chars.append(c) + ns_text = "".join(ns_chars) + return (ns_text, ns_to_s_map) + + # We first tokenize `orig_text`, strip whitespace from the result + # and `pred_text`, and check if they are the same length. If they are + # NOT the same length, the heuristic has failed. If they are the same + # length, we assume the characters are one-to-one aligned. + tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + + tok_text = " ".join(tokenizer.tokenize(orig_text)) + + start_position = tok_text.find(pred_text) + if start_position == -1: + if verbose_logging: + logger.info( + "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) + return orig_text + end_position = start_position + len(pred_text) - 1 + + (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) + (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) + + if len(orig_ns_text) != len(tok_ns_text): + if verbose_logging: + logger.info("Length not equal after stripping spaces: '%s' vs '%s'", + orig_ns_text, tok_ns_text) + return orig_text + + # We then project the characters in `pred_text` back to `orig_text` using + # the character-to-character alignment. + tok_s_to_ns_map = {} + for (i, tok_index) in tok_ns_to_s_map.items(): + tok_s_to_ns_map[tok_index] = i + + orig_start_position = None + if start_position in tok_s_to_ns_map: + ns_start_position = tok_s_to_ns_map[start_position] + if ns_start_position in orig_ns_to_s_map: + orig_start_position = orig_ns_to_s_map[ns_start_position] + + if orig_start_position is None: + if verbose_logging: + logger.info("Couldn't map start position") + return orig_text + + orig_end_position = None + if end_position in tok_s_to_ns_map: + ns_end_position = tok_s_to_ns_map[end_position] + if ns_end_position in orig_ns_to_s_map: + orig_end_position = orig_ns_to_s_map[ns_end_position] + + if orig_end_position is None: + if verbose_logging: + logger.info("Couldn't map end position") + return orig_text + + output_text = orig_text[orig_start_position:(orig_end_position + 1)] + return output_text + + +def _get_best_indexes(logits, n_best_size): + """Get the n-best logits from a list.""" + index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) + + best_indexes = [] + for i in range(len(index_and_score)): + if i >= n_best_size: + break + best_indexes.append(index_and_score[i][0]) + return best_indexes + + +def _compute_softmax(scores): + """Compute softmax probability over raw logits.""" + if not scores: + return [] + + max_score = None + for score in scores: + if max_score is None or score > max_score: + max_score = score + + exp_scores = [] + total_sum = 0.0 + for score in scores: + x = math.exp(score - max_score) + exp_scores.append(x) + total_sum += x + + probs = [] + for score in exp_scores: + probs.append(score / total_sum) + return probs diff --git a/cdqa/reader/utils_squad_evaluate.py b/cdqa/reader/hf_original_examples/utils_squad_evaluate.py similarity index 100% rename from cdqa/reader/utils_squad_evaluate.py rename to cdqa/reader/hf_original_examples/utils_squad_evaluate.py diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py new file mode 100644 index 00000000..e69de29b diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 305eeb7b..e69de29b 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -1,743 +0,0 @@ - -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Load SQuAD dataset. """ - -from __future__ import absolute_import, division, print_function - -import json -import logging -import math -import collections -from io import open - -from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize - -logger = logging.getLogger(__name__) - - -class SquadExample(object): - """ - A single training/test example for the Squad dataset. - For examples without an answer, the start and end position are -1. - """ - - def __init__(self, - qas_id, - question_text, - doc_tokens, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=None): - self.qas_id = qas_id - self.question_text = question_text - self.doc_tokens = doc_tokens - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - def __str__(self): - return self.__repr__() - - def __repr__(self): - s = "" - s += "qas_id: %s" % (self.qas_id) - s += ", question_text: %s" % ( - self.question_text) - s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) - if self.start_position: - s += ", start_position: %d" % (self.start_position) - if self.end_position: - s += ", end_position: %d" % (self.end_position) - if self.is_impossible: - s += ", is_impossible: %r" % (self.is_impossible) - return s - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - start_position=None, - end_position=None, - is_impossible=None): - self.unique_id = unique_id - self.example_index = example_index - self.doc_span_index = doc_span_index - self.tokens = tokens - self.token_to_orig_map = token_to_orig_map - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -def read_squad_examples(input_file, is_training, version_2_with_negative): - """Read a SQuAD json file into a list of SquadExample.""" - with open(input_file, "r", encoding='utf-8') as reader: - input_data = json.load(reader)["data"] - - def is_whitespace(c): - if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: - return True - return False - - examples = [] - for entry in input_data: - for paragraph in entry["paragraphs"]: - paragraph_text = paragraph["context"] - doc_tokens = [] - char_to_word_offset = [] - prev_is_whitespace = True - for c in paragraph_text: - if is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - for qa in paragraph["qas"]: - qas_id = qa["id"] - question_text = qa["question"] - start_position = None - end_position = None - orig_answer_text = None - is_impossible = False - if is_training: - if version_2_with_negative: - is_impossible = qa["is_impossible"] - if (len(qa["answers"]) != 1) and (not is_impossible): - raise ValueError( - "For training, each question should have exactly 1 answer.") - if not is_impossible: - answer = qa["answers"][0] - orig_answer_text = answer["text"] - answer_offset = answer["answer_start"] - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[answer_offset + answer_length - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = " ".join(doc_tokens[start_position:(end_position + 1)]) - cleaned_answer_text = " ".join( - whitespace_tokenize(orig_answer_text)) - if actual_text.find(cleaned_answer_text) == -1: - logger.warning("Could not find answer: '%s' vs. '%s'", - actual_text, cleaned_answer_text) - continue - else: - start_position = -1 - end_position = -1 - orig_answer_text = "" - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - examples.append(example) - return examples - - -def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training): - """Loads a data file into a list of `InputBatch`s.""" - - unique_id = 1000000000 - - features = [] - for (example_index, example) in enumerate(examples): - query_tokens = tokenizer.tokenize(example.question_text) - - if len(query_tokens) > max_query_length: - query_tokens = query_tokens[0:max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = tokenizer.tokenize(token) - for sub_token in sub_tokens: - tok_to_orig_index.append(i) - all_doc_tokens.append(sub_token) - - tok_start_position = None - tok_end_position = None - if is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 - else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = _improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, tokenizer, - example.orig_answer_text) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) - doc_spans = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_to_orig_map = {} - token_is_max_context = {} - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - start_position = None - end_position = None - if is_training and not example.is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start and - tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 - if example_index < 20: - logger.info("*** Example ***") - logger.info("unique_id: %s" % (unique_id)) - logger.info("example_index: %s" % (example_index)) - logger.info("doc_span_index: %s" % (doc_span_index)) - logger.info("tokens: %s" % " ".join(tokens)) - logger.info("token_to_orig_map: %s" % " ".join([ - "%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])) - logger.info("token_is_max_context: %s" % " ".join([ - "%d:%s" % (x, y) for (x, y) in token_is_max_context.items() - ])) - logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) - logger.info( - "input_mask: %s" % " ".join([str(x) for x in input_mask])) - logger.info( - "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) - if is_training and example.is_impossible: - logger.info("impossible example") - if is_training and not example.is_impossible: - answer_text = " ".join(tokens[start_position:(end_position + 1)]) - logger.info("start_position: %d" % (start_position)) - logger.info("end_position: %d" % (end_position)) - logger.info( - "answer: %s" % (answer_text)) - - features.append( - InputFeatures( - unique_id=unique_id, - example_index=example_index, - doc_span_index=doc_span_index, - tokens=tokens, - token_to_orig_map=token_to_orig_map, - token_is_max_context=token_is_max_context, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - start_position=start_position, - end_position=end_position, - is_impossible=example.is_impossible)) - unique_id += 1 - - return features - - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - - -RawResult = collections.namedtuple("RawResult", - ["unique_id", "start_logits", "end_logits"]) - - -def write_predictions(all_examples, all_features, all_results, n_best_size, - max_answer_length, do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, verbose_logging, - version_2_with_negative, null_score_diff_threshold): - """Write final predictions to the json file and log-odds of null if needed.""" - logger.info("Writing predictions to: %s" % (output_prediction_file)) - logger.info("Writing nbest to: %s" % (output_nbest_file)) - - example_index_to_features = collections.defaultdict(list) - for feature in all_features: - example_index_to_features[feature.example_index].append(feature) - - unique_id_to_result = {} - for result in all_results: - unique_id_to_result[result.unique_id] = result - - _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name - "PrelimPrediction", - ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) - - all_predictions = collections.OrderedDict() - all_nbest_json = collections.OrderedDict() - scores_diff_json = collections.OrderedDict() - - for (example_index, example) in enumerate(all_examples): - features = example_index_to_features[example_index] - - prelim_predictions = [] - # keep track of the minimum score of null start+end of position 0 - score_null = 1000000 # large and positive - min_null_feature_index = 0 # the paragraph slice with min null score - null_start_logit = 0 # the start logit at the slice with min null score - null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): - result = unique_id_to_result[feature.unique_id] - start_indexes = _get_best_indexes(result.start_logits, n_best_size) - end_indexes = _get_best_indexes(result.end_logits, n_best_size) - # if we could have irrelevant answers, get the min score of irrelevant - if version_2_with_negative: - feature_null_score = result.start_logits[0] + result.end_logits[0] - if feature_null_score < score_null: - score_null = feature_null_score - min_null_feature_index = feature_index - null_start_logit = result.start_logits[0] - null_end_logit = result.end_logits[0] - for start_index in start_indexes: - for end_index in end_indexes: - # We could hypothetically create invalid predictions, e.g., predict - # that the start of the span is in the question. We throw out all - # invalid predictions. - if start_index >= len(feature.tokens): - continue - if end_index >= len(feature.tokens): - continue - if start_index not in feature.token_to_orig_map: - continue - if end_index not in feature.token_to_orig_map: - continue - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction( - feature_index=feature_index, - start_index=start_index, - end_index=end_index, - start_logit=result.start_logits[start_index], - end_logit=result.end_logits[end_index])) - if version_2_with_negative: - prelim_predictions.append( - _PrelimPrediction( - feature_index=min_null_feature_index, - start_index=0, - end_index=0, - start_logit=null_start_logit, - end_logit=null_end_logit)) - prelim_predictions = sorted( - prelim_predictions, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True) - - _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name - "NbestPrediction", ["text", "start_logit", "end_logit"]) - - seen_predictions = {} - nbest = [] - for pred in prelim_predictions: - if len(nbest) >= n_best_size: - break - feature = features[pred.feature_index] - if pred.start_index > 0: # this is a non-null prediction - tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] - orig_doc_start = feature.token_to_orig_map[pred.start_index] - orig_doc_end = feature.token_to_orig_map[pred.end_index] - orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] - tok_text = " ".join(tok_tokens) - - # De-tokenize WordPieces that have been split off. - tok_text = tok_text.replace(" ##", "") - tok_text = tok_text.replace("##", "") - - # Clean whitespace - tok_text = tok_text.strip() - tok_text = " ".join(tok_text.split()) - orig_text = " ".join(orig_tokens) - - final_text = get_final_text(tok_text, orig_text, do_lower_case, verbose_logging) - if final_text in seen_predictions: - continue - - seen_predictions[final_text] = True - else: - final_text = "" - seen_predictions[final_text] = True - - nbest.append( - _NbestPrediction( - text=final_text, - start_logit=pred.start_logit, - end_logit=pred.end_logit)) - # if we didn't include the empty option in the n-best, include it - if version_2_with_negative: - if "" not in seen_predictions: - nbest.append( - _NbestPrediction( - text="", - start_logit=null_start_logit, - end_logit=null_end_logit)) - - # In very rare edge cases we could only have single null prediction. - # So we just create a nonce prediction in this case to avoid failure. - if len(nbest)==1: - nbest.insert(0, - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) - - # In very rare edge cases we could have no valid predictions. So we - # just create a nonce prediction in this case to avoid failure. - if not nbest: - nbest.append( - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) - - assert len(nbest) >= 1 - - total_scores = [] - best_non_null_entry = None - for entry in nbest: - total_scores.append(entry.start_logit + entry.end_logit) - if not best_non_null_entry: - if entry.text: - best_non_null_entry = entry - - probs = _compute_softmax(total_scores) - - nbest_json = [] - for (i, entry) in enumerate(nbest): - output = collections.OrderedDict() - output["text"] = entry.text - output["probability"] = probs[i] - output["start_logit"] = entry.start_logit - output["end_logit"] = entry.end_logit - nbest_json.append(output) - - assert len(nbest_json) >= 1 - - if not version_2_with_negative: - all_predictions[example.qas_id] = nbest_json[0]["text"] - else: - # predict "" iff the null score - the score of best non-null > threshold - score_diff = score_null - best_non_null_entry.start_logit - ( - best_non_null_entry.end_logit) - scores_diff_json[example.qas_id] = score_diff - if score_diff > null_score_diff_threshold: - all_predictions[example.qas_id] = "" - else: - all_predictions[example.qas_id] = best_non_null_entry.text - all_nbest_json[example.qas_id] = nbest_json - - with open(output_prediction_file, "w") as writer: - writer.write(json.dumps(all_predictions, indent=4) + "\n") - - with open(output_nbest_file, "w") as writer: - writer.write(json.dumps(all_nbest_json, indent=4) + "\n") - - if version_2_with_negative: - with open(output_null_log_odds_file, "w") as writer: - writer.write(json.dumps(scores_diff_json, indent=4) + "\n") - - return all_predictions - - -def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): - """Project the tokenized prediction back to the original text.""" - - # When we created the data, we kept track of the alignment between original - # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So - # now `orig_text` contains the span of our original text corresponding to the - # span that we predicted. - # - # However, `orig_text` may contain extra characters that we don't want in - # our prediction. - # - # For example, let's say: - # pred_text = steve smith - # orig_text = Steve Smith's - # - # We don't want to return `orig_text` because it contains the extra "'s". - # - # We don't want to return `pred_text` because it's already been normalized - # (the SQuAD eval script also does punctuation stripping/lower casing but - # our tokenizer does additional normalization like stripping accent - # characters). - # - # What we really want to return is "Steve Smith". - # - # Therefore, we have to apply a semi-complicated alignment heuristic between - # `pred_text` and `orig_text` to get a character-to-character alignment. This - # can fail in certain cases in which case we just return `orig_text`. - - def _strip_spaces(text): - ns_chars = [] - ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): - if c == " ": - continue - ns_to_s_map[len(ns_chars)] = i - ns_chars.append(c) - ns_text = "".join(ns_chars) - return (ns_text, ns_to_s_map) - - # We first tokenize `orig_text`, strip whitespace from the result - # and `pred_text`, and check if they are the same length. If they are - # NOT the same length, the heuristic has failed. If they are the same - # length, we assume the characters are one-to-one aligned. - tokenizer = BasicTokenizer(do_lower_case=do_lower_case) - - tok_text = " ".join(tokenizer.tokenize(orig_text)) - - start_position = tok_text.find(pred_text) - if start_position == -1: - if verbose_logging: - logger.info( - "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) - return orig_text - end_position = start_position + len(pred_text) - 1 - - (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) - (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) - - if len(orig_ns_text) != len(tok_ns_text): - if verbose_logging: - logger.info("Length not equal after stripping spaces: '%s' vs '%s'", - orig_ns_text, tok_ns_text) - return orig_text - - # We then project the characters in `pred_text` back to `orig_text` using - # the character-to-character alignment. - tok_s_to_ns_map = {} - for (i, tok_index) in tok_ns_to_s_map.items(): - tok_s_to_ns_map[tok_index] = i - - orig_start_position = None - if start_position in tok_s_to_ns_map: - ns_start_position = tok_s_to_ns_map[start_position] - if ns_start_position in orig_ns_to_s_map: - orig_start_position = orig_ns_to_s_map[ns_start_position] - - if orig_start_position is None: - if verbose_logging: - logger.info("Couldn't map start position") - return orig_text - - orig_end_position = None - if end_position in tok_s_to_ns_map: - ns_end_position = tok_s_to_ns_map[end_position] - if ns_end_position in orig_ns_to_s_map: - orig_end_position = orig_ns_to_s_map[ns_end_position] - - if orig_end_position is None: - if verbose_logging: - logger.info("Couldn't map end position") - return orig_text - - output_text = orig_text[orig_start_position:(orig_end_position + 1)] - return output_text - - -def _get_best_indexes(logits, n_best_size): - """Get the n-best logits from a list.""" - index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) - - best_indexes = [] - for i in range(len(index_and_score)): - if i >= n_best_size: - break - best_indexes.append(index_and_score[i][0]) - return best_indexes - - -def _compute_softmax(scores): - """Compute softmax probability over raw logits.""" - if not scores: - return [] - - max_score = None - for score in scores: - if max_score is None or score > max_score: - max_score = score - - exp_scores = [] - total_sum = 0.0 - for score in scores: - x = math.exp(score - max_score) - exp_scores.append(x) - total_sum += x - - probs = [] - for score in exp_scores: - probs.append(score / total_sum) - return probs From 9ccea6b49bded28a798f1d069f22b29e6435ecc5 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Sun, 14 Jul 2019 15:13:27 +0200 Subject: [PATCH 03/43] add utils_squad from HF original --- cdqa/reader/utils_squad.py | 743 +++++++++++++++++++++++++++++++++++++ 1 file changed, 743 insertions(+) diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index e69de29b..305eeb7b 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -0,0 +1,743 @@ + +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Load SQuAD dataset. """ + +from __future__ import absolute_import, division, print_function + +import json +import logging +import math +import collections +from io import open + +from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize + +logger = logging.getLogger(__name__) + + +class SquadExample(object): + """ + A single training/test example for the Squad dataset. + For examples without an answer, the start and end position are -1. + """ + + def __init__(self, + qas_id, + question_text, + doc_tokens, + orig_answer_text=None, + start_position=None, + end_position=None, + is_impossible=None): + self.qas_id = qas_id + self.question_text = question_text + self.doc_tokens = doc_tokens + self.orig_answer_text = orig_answer_text + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + def __str__(self): + return self.__repr__() + + def __repr__(self): + s = "" + s += "qas_id: %s" % (self.qas_id) + s += ", question_text: %s" % ( + self.question_text) + s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) + if self.start_position: + s += ", start_position: %d" % (self.start_position) + if self.end_position: + s += ", end_position: %d" % (self.end_position) + if self.is_impossible: + s += ", is_impossible: %r" % (self.is_impossible) + return s + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None): + self.unique_id = unique_id + self.example_index = example_index + self.doc_span_index = doc_span_index + self.tokens = tokens + self.token_to_orig_map = token_to_orig_map + self.token_is_max_context = token_is_max_context + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + +def read_squad_examples(input_file, is_training, version_2_with_negative): + """Read a SQuAD json file into a list of SquadExample.""" + with open(input_file, "r", encoding='utf-8') as reader: + input_data = json.load(reader)["data"] + + def is_whitespace(c): + if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: + return True + return False + + examples = [] + for entry in input_data: + for paragraph in entry["paragraphs"]: + paragraph_text = paragraph["context"] + doc_tokens = [] + char_to_word_offset = [] + prev_is_whitespace = True + for c in paragraph_text: + if is_whitespace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + for qa in paragraph["qas"]: + qas_id = qa["id"] + question_text = qa["question"] + start_position = None + end_position = None + orig_answer_text = None + is_impossible = False + if is_training: + if version_2_with_negative: + is_impossible = qa["is_impossible"] + if (len(qa["answers"]) != 1) and (not is_impossible): + raise ValueError( + "For training, each question should have exactly 1 answer.") + if not is_impossible: + answer = qa["answers"][0] + orig_answer_text = answer["text"] + answer_offset = answer["answer_start"] + answer_length = len(orig_answer_text) + start_position = char_to_word_offset[answer_offset] + end_position = char_to_word_offset[answer_offset + answer_length - 1] + # Only add answers where the text can be exactly recovered from the + # document. If this CAN'T happen it's likely due to weird Unicode + # stuff so we will just skip the example. + # + # Note that this means for training mode, every example is NOT + # guaranteed to be preserved. + actual_text = " ".join(doc_tokens[start_position:(end_position + 1)]) + cleaned_answer_text = " ".join( + whitespace_tokenize(orig_answer_text)) + if actual_text.find(cleaned_answer_text) == -1: + logger.warning("Could not find answer: '%s' vs. '%s'", + actual_text, cleaned_answer_text) + continue + else: + start_position = -1 + end_position = -1 + orig_answer_text = "" + + example = SquadExample( + qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position, + is_impossible=is_impossible) + examples.append(example) + return examples + + +def convert_examples_to_features(examples, tokenizer, max_seq_length, + doc_stride, max_query_length, is_training): + """Loads a data file into a list of `InputBatch`s.""" + + unique_id = 1000000000 + + features = [] + for (example_index, example) in enumerate(examples): + query_tokens = tokenizer.tokenize(example.question_text) + + if len(query_tokens) > max_query_length: + query_tokens = query_tokens[0:max_query_length] + + tok_to_orig_index = [] + orig_to_tok_index = [] + all_doc_tokens = [] + for (i, token) in enumerate(example.doc_tokens): + orig_to_tok_index.append(len(all_doc_tokens)) + sub_tokens = tokenizer.tokenize(token) + for sub_token in sub_tokens: + tok_to_orig_index.append(i) + all_doc_tokens.append(sub_token) + + tok_start_position = None + tok_end_position = None + if is_training and example.is_impossible: + tok_start_position = -1 + tok_end_position = -1 + if is_training and not example.is_impossible: + tok_start_position = orig_to_tok_index[example.start_position] + if example.end_position < len(example.doc_tokens) - 1: + tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 + else: + tok_end_position = len(all_doc_tokens) - 1 + (tok_start_position, tok_end_position) = _improve_answer_span( + all_doc_tokens, tok_start_position, tok_end_position, tokenizer, + example.orig_answer_text) + + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple( # pylint: disable=invalid-name + "DocSpan", ["start", "length"]) + doc_spans = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, doc_stride) + + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_to_orig_map = {} + token_is_max_context = {} + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in query_tokens: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + start_position = None + end_position = None + if is_training and not example.is_impossible: + # For training, if our document chunk does not contain an annotation + # we throw it out, since there is nothing to predict. + doc_start = doc_span.start + doc_end = doc_span.start + doc_span.length - 1 + out_of_span = False + if not (tok_start_position >= doc_start and + tok_end_position <= doc_end): + out_of_span = True + if out_of_span: + start_position = 0 + end_position = 0 + else: + doc_offset = len(query_tokens) + 2 + start_position = tok_start_position - doc_start + doc_offset + end_position = tok_end_position - doc_start + doc_offset + if is_training and example.is_impossible: + start_position = 0 + end_position = 0 + if example_index < 20: + logger.info("*** Example ***") + logger.info("unique_id: %s" % (unique_id)) + logger.info("example_index: %s" % (example_index)) + logger.info("doc_span_index: %s" % (doc_span_index)) + logger.info("tokens: %s" % " ".join(tokens)) + logger.info("token_to_orig_map: %s" % " ".join([ + "%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])) + logger.info("token_is_max_context: %s" % " ".join([ + "%d:%s" % (x, y) for (x, y) in token_is_max_context.items() + ])) + logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) + logger.info( + "input_mask: %s" % " ".join([str(x) for x in input_mask])) + logger.info( + "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) + if is_training and example.is_impossible: + logger.info("impossible example") + if is_training and not example.is_impossible: + answer_text = " ".join(tokens[start_position:(end_position + 1)]) + logger.info("start_position: %d" % (start_position)) + logger.info("end_position: %d" % (end_position)) + logger.info( + "answer: %s" % (answer_text)) + + features.append( + InputFeatures( + unique_id=unique_id, + example_index=example_index, + doc_span_index=doc_span_index, + tokens=tokens, + token_to_orig_map=token_to_orig_map, + token_is_max_context=token_is_max_context, + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + start_position=start_position, + end_position=end_position, + is_impossible=example.is_impossible)) + unique_id += 1 + + return features + + +def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, + orig_answer_text): + """Returns tokenized answer spans that better match the annotated answer.""" + + # The SQuAD annotations are character based. We first project them to + # whitespace-tokenized words. But then after WordPiece tokenization, we can + # often find a "better match". For example: + # + # Question: What year was John Smith born? + # Context: The leader was John Smith (1895-1943). + # Answer: 1895 + # + # The original whitespace-tokenized answer will be "(1895-1943).". However + # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match + # the exact answer, 1895. + # + # However, this is not always possible. Consider the following: + # + # Question: What country is the top exporter of electornics? + # Context: The Japanese electronics industry is the lagest in the world. + # Answer: Japan + # + # In this case, the annotator chose "Japan" as a character sub-span of + # the word "Japanese". Since our WordPiece tokenizer does not split + # "Japanese", we just use "Japanese" as the annotation. This is fairly rare + # in SQuAD, but does happen. + tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) + + for new_start in range(input_start, input_end + 1): + for new_end in range(input_end, new_start - 1, -1): + text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) + if text_span == tok_answer_text: + return (new_start, new_end) + + return (input_start, input_end) + + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index + + +RawResult = collections.namedtuple("RawResult", + ["unique_id", "start_logits", "end_logits"]) + + +def write_predictions(all_examples, all_features, all_results, n_best_size, + max_answer_length, do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, verbose_logging, + version_2_with_negative, null_score_diff_threshold): + """Write final predictions to the json file and log-odds of null if needed.""" + logger.info("Writing predictions to: %s" % (output_prediction_file)) + logger.info("Writing nbest to: %s" % (output_nbest_file)) + + example_index_to_features = collections.defaultdict(list) + for feature in all_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + scores_diff_json = collections.OrderedDict() + + for (example_index, example) in enumerate(all_examples): + features = example_index_to_features[example_index] + + prelim_predictions = [] + # keep track of the minimum score of null start+end of position 0 + score_null = 1000000 # large and positive + min_null_feature_index = 0 # the paragraph slice with min null score + null_start_logit = 0 # the start logit at the slice with min null score + null_end_logit = 0 # the end logit at the slice with min null score + for (feature_index, feature) in enumerate(features): + result = unique_id_to_result[feature.unique_id] + start_indexes = _get_best_indexes(result.start_logits, n_best_size) + end_indexes = _get_best_indexes(result.end_logits, n_best_size) + # if we could have irrelevant answers, get the min score of irrelevant + if version_2_with_negative: + feature_null_score = result.start_logits[0] + result.end_logits[0] + if feature_null_score < score_null: + score_null = feature_null_score + min_null_feature_index = feature_index + null_start_logit = result.start_logits[0] + null_end_logit = result.end_logits[0] + for start_index in start_indexes: + for end_index in end_indexes: + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_logit=result.start_logits[start_index], + end_logit=result.end_logits[end_index])) + if version_2_with_negative: + prelim_predictions.append( + _PrelimPrediction( + feature_index=min_null_feature_index, + start_index=0, + end_index=0, + start_logit=null_start_logit, + end_logit=null_end_logit)) + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True) + + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_logit", "end_logit"]) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_index] + if pred.start_index > 0: # this is a non-null prediction + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = " ".join(tok_tokens) + + # De-tokenize WordPieces that have been split off. + tok_text = tok_text.replace(" ##", "") + tok_text = tok_text.replace("##", "") + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, do_lower_case, verbose_logging) + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + else: + final_text = "" + seen_predictions[final_text] = True + + nbest.append( + _NbestPrediction( + text=final_text, + start_logit=pred.start_logit, + end_logit=pred.end_logit)) + # if we didn't include the empty option in the n-best, include it + if version_2_with_negative: + if "" not in seen_predictions: + nbest.append( + _NbestPrediction( + text="", + start_logit=null_start_logit, + end_logit=null_end_logit)) + + # In very rare edge cases we could only have single null prediction. + # So we just create a nonce prediction in this case to avoid failure. + if len(nbest)==1: + nbest.insert(0, + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + assert len(nbest) >= 1 + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_logit + entry.end_logit) + if not best_non_null_entry: + if entry.text: + best_non_null_entry = entry + + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_logit"] = entry.start_logit + output["end_logit"] = entry.end_logit + nbest_json.append(output) + + assert len(nbest_json) >= 1 + + if not version_2_with_negative: + all_predictions[example.qas_id] = nbest_json[0]["text"] + else: + # predict "" iff the null score - the score of best non-null > threshold + score_diff = score_null - best_non_null_entry.start_logit - ( + best_non_null_entry.end_logit) + scores_diff_json[example.qas_id] = score_diff + if score_diff > null_score_diff_threshold: + all_predictions[example.qas_id] = "" + else: + all_predictions[example.qas_id] = best_non_null_entry.text + all_nbest_json[example.qas_id] = nbest_json + + with open(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") + + with open(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + + if version_2_with_negative: + with open(output_null_log_odds_file, "w") as writer: + writer.write(json.dumps(scores_diff_json, indent=4) + "\n") + + return all_predictions + + +def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): + """Project the tokenized prediction back to the original text.""" + + # When we created the data, we kept track of the alignment between original + # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So + # now `orig_text` contains the span of our original text corresponding to the + # span that we predicted. + # + # However, `orig_text` may contain extra characters that we don't want in + # our prediction. + # + # For example, let's say: + # pred_text = steve smith + # orig_text = Steve Smith's + # + # We don't want to return `orig_text` because it contains the extra "'s". + # + # We don't want to return `pred_text` because it's already been normalized + # (the SQuAD eval script also does punctuation stripping/lower casing but + # our tokenizer does additional normalization like stripping accent + # characters). + # + # What we really want to return is "Steve Smith". + # + # Therefore, we have to apply a semi-complicated alignment heuristic between + # `pred_text` and `orig_text` to get a character-to-character alignment. This + # can fail in certain cases in which case we just return `orig_text`. + + def _strip_spaces(text): + ns_chars = [] + ns_to_s_map = collections.OrderedDict() + for (i, c) in enumerate(text): + if c == " ": + continue + ns_to_s_map[len(ns_chars)] = i + ns_chars.append(c) + ns_text = "".join(ns_chars) + return (ns_text, ns_to_s_map) + + # We first tokenize `orig_text`, strip whitespace from the result + # and `pred_text`, and check if they are the same length. If they are + # NOT the same length, the heuristic has failed. If they are the same + # length, we assume the characters are one-to-one aligned. + tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + + tok_text = " ".join(tokenizer.tokenize(orig_text)) + + start_position = tok_text.find(pred_text) + if start_position == -1: + if verbose_logging: + logger.info( + "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) + return orig_text + end_position = start_position + len(pred_text) - 1 + + (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) + (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) + + if len(orig_ns_text) != len(tok_ns_text): + if verbose_logging: + logger.info("Length not equal after stripping spaces: '%s' vs '%s'", + orig_ns_text, tok_ns_text) + return orig_text + + # We then project the characters in `pred_text` back to `orig_text` using + # the character-to-character alignment. + tok_s_to_ns_map = {} + for (i, tok_index) in tok_ns_to_s_map.items(): + tok_s_to_ns_map[tok_index] = i + + orig_start_position = None + if start_position in tok_s_to_ns_map: + ns_start_position = tok_s_to_ns_map[start_position] + if ns_start_position in orig_ns_to_s_map: + orig_start_position = orig_ns_to_s_map[ns_start_position] + + if orig_start_position is None: + if verbose_logging: + logger.info("Couldn't map start position") + return orig_text + + orig_end_position = None + if end_position in tok_s_to_ns_map: + ns_end_position = tok_s_to_ns_map[end_position] + if ns_end_position in orig_ns_to_s_map: + orig_end_position = orig_ns_to_s_map[ns_end_position] + + if orig_end_position is None: + if verbose_logging: + logger.info("Couldn't map end position") + return orig_text + + output_text = orig_text[orig_start_position:(orig_end_position + 1)] + return output_text + + +def _get_best_indexes(logits, n_best_size): + """Get the n-best logits from a list.""" + index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) + + best_indexes = [] + for i in range(len(index_and_score)): + if i >= n_best_size: + break + best_indexes.append(index_and_score[i][0]) + return best_indexes + + +def _compute_softmax(scores): + """Compute softmax probability over raw logits.""" + if not scores: + return [] + + max_score = None + for score in scores: + if max_score is None or score > max_score: + max_score = score + + exp_scores = [] + total_sum = 0.0 + for score in scores: + x = math.exp(score - max_score) + exp_scores.append(x) + total_sum += x + + probs = [] + for score in exp_scores: + probs.append(score / total_sum) + return probs From 9a9b1a5913e979f467f11604f7840948a308e861 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Sun, 14 Jul 2019 15:16:00 +0200 Subject: [PATCH 04/43] adapt utils_squad for cdqa --- cdqa/reader/utils_squad.py | 56 ++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 305eeb7b..27fafa02 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -42,7 +42,9 @@ def __init__(self, orig_answer_text=None, start_position=None, end_position=None, - is_impossible=None): + is_impossible=None, + paragraph=None, + title=None): self.qas_id = qas_id self.question_text = question_text self.doc_tokens = doc_tokens @@ -50,6 +52,8 @@ def __init__(self, self.start_position = start_position self.end_position = end_position self.is_impossible = is_impossible + self.paragraph = paragraph + self.title = title def __str__(self): return self.__repr__() @@ -101,8 +105,12 @@ def __init__(self, def read_squad_examples(input_file, is_training, version_2_with_negative): """Read a SQuAD json file into a list of SquadExample.""" - with open(input_file, "r", encoding='utf-8') as reader: - input_data = json.load(reader)["data"] + + if isinstance(input_file, str): + with open(input_file, "r", encoding='utf-8') as reader: + input_data = json.load(reader)["data"] + else: + input_data = input_file def is_whitespace(c): if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: @@ -172,13 +180,15 @@ def is_whitespace(c): orig_answer_text=orig_answer_text, start_position=start_position, end_position=end_position, - is_impossible=is_impossible) + is_impossible=is_impossible, + paragraph=paragraph_text, + title=entry["title"]) examples.append(example) return examples def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training): + doc_stride, max_query_length, is_training, verbose): """Loads a data file into a list of `InputBatch`s.""" unique_id = 1000000000 @@ -296,7 +306,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, if is_training and example.is_impossible: start_position = 0 end_position = 0 - if example_index < 20: + if example_index < 20 and verbose: logger.info("*** Example ***") logger.info("unique_id: %s" % (unique_id)) logger.info("example_index: %s" % (example_index)) @@ -423,8 +433,10 @@ def write_predictions(all_examples, all_features, all_results, n_best_size, output_nbest_file, output_null_log_odds_file, verbose_logging, version_2_with_negative, null_score_diff_threshold): """Write final predictions to the json file and log-odds of null if needed.""" - logger.info("Writing predictions to: %s" % (output_prediction_file)) - logger.info("Writing nbest to: %s" % (output_nbest_file)) + + if verbose_logging: + logger.info("Writing predictions to: %s" % (output_prediction_file)) + logger.info("Writing nbest to: %s" % (output_nbest_file)) example_index_to_features = collections.defaultdict(list) for feature in all_features: @@ -441,6 +453,7 @@ def write_predictions(all_examples, all_features, all_results, n_best_size, all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() + final_predictions = collections.OrderedDict() for (example_index, example) in enumerate(all_examples): features = example_index_to_features[example_index] @@ -599,17 +612,32 @@ def write_predictions(all_examples, all_features, all_results, n_best_size, all_predictions[example.qas_id] = best_non_null_entry.text all_nbest_json[example.qas_id] = nbest_json - with open(output_prediction_file, "w") as writer: - writer.write(json.dumps(all_predictions, indent=4) + "\n") + final_predictions[example.qas_id] = nbest_json[0] + + final_predictions_sorted = collections.OrderedDict(sorted(final_predictions.items(), + key=lambda item: item[1]['start_logit'] + + item[1]['end_logit'], + reverse=True)) + + question_id = list(final_predictions_sorted.items())[0][0] + title = [e for e in all_examples if e.qas_id == question_id][0].title + paragraph = [e for e in all_examples if e.qas_id == question_id][0].paragraph + + final_prediction = list(final_predictions_sorted.items())[0][1]['text'], title, paragraph + + if output_prediction_file: + with open(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") - with open(output_nbest_file, "w") as writer: - writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + if output_nbest_file: + with open(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") - if version_2_with_negative: + if version_2_with_negative and output_null_log_odds_file: with open(output_null_log_odds_file, "w") as writer: writer.write(json.dumps(scores_diff_json, indent=4) + "\n") - return all_predictions + return final_prediction, all_predictions, all_nbest_json, scores_diff_json def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): From 12af10bc9316c688b446546b162036e899f28d81 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Sun, 14 Jul 2019 15:21:40 +0200 Subject: [PATCH 05/43] add reader_sklearn from HF original run_squad.py --- cdqa/reader/reader_sklearn.py | 477 ++++++++++++++++++++++++++++++++++ 1 file changed, 477 insertions(+) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index e69de29b..af4a771f 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -0,0 +1,477 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Finetuning a question-answering model (Bert, XLM, XLNet,...) on SQuAD.""" + +from __future__ import absolute_import, division, print_function + +import argparse +import logging +import os +import random +from io import open + +import numpy as np +import torch +from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, + TensorDataset) +from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange + +from tensorboardX import SummaryWriter + +from pytorch_transformers import (WEIGHTS_NAME, BertConfig, + BertForQuestionAnswering, BertTokenizer, + XLMConfig, XLMForQuestionAnswering, + XLMTokenizer, XLNetConfig, + XLNetForQuestionAnswering, + XLNetTokenizer) + +from pytorch_transformers import AdamW, WarmupLinearSchedule + +from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions + +from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad + +logger = logging.getLogger(__name__) + +ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) \ + for conf in (BertConfig, XLNetConfig, XLMConfig)), ()) + +MODEL_CLASSES = { + 'bert': (BertConfig, BertForQuestionAnswering, BertTokenizer), + 'xlnet': (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer), + 'xlm': (XLMConfig, XLMForQuestionAnswering, XLMTokenizer), +} + +def set_seed(args): + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if args.n_gpu > 0: + torch.cuda.manual_seed_all(args.seed) + + +def train(args, train_dataset, model, tokenizer): + """ Train the model """ + if args.local_rank in [-1, 0]: + tb_writer = SummaryWriter() + + args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) + train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset) + train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) + + if args.max_steps > 0: + t_total = args.max_steps + args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1 + else: + t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs + + # Prepare optimizer and schedule (linear warmup and decay) + no_decay = ['bias', 'LayerNorm.weight'] + optimizer_grouped_parameters = [ + {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, + {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} + ] + optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) + scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total) + if args.fp16: + try: + from apex import amp + except ImportError: + raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") + model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) + + # Train! + logger.info("***** Running training *****") + logger.info(" Num examples = %d", len(train_dataset)) + logger.info(" Num Epochs = %d", args.num_train_epochs) + logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) + logger.info(" Total train batch size (w. parallel, distributed & accumulation) = %d", + args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1)) + logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) + logger.info(" Total optimization steps = %d", t_total) + + global_step = 0 + tr_loss, logging_loss = 0.0, 0.0 + model.zero_grad() + train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]) + set_seed(args) # Added here for reproductibility (even between python 2 and 3) + for _ in train_iterator: + epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) + for step, batch in enumerate(epoch_iterator): + model.train() + batch = tuple(t.to(args.device) for t in batch) + inputs = {'input_ids': batch[0], + 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids + 'attention_mask': batch[2], + 'start_positions': batch[3], + 'end_positions': batch[4]} + ouputs = model(**inputs) + loss = ouputs[0] # model outputs are always tuple in pytorch-transformers (see doc) + + if args.n_gpu > 1: + loss = loss.mean() # mean() to average on multi-gpu parallel training + if args.gradient_accumulation_steps > 1: + loss = loss / args.gradient_accumulation_steps + + if args.fp16: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) + else: + loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) + + tr_loss += loss.item() + if (step + 1) % args.gradient_accumulation_steps == 0: + scheduler.step() # Update learning rate schedule + optimizer.step() + model.zero_grad() + global_step += 1 + + if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0: + # Log metrics + if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well + results = evaluate(args, model, tokenizer) + for key, value in results.items(): + tb_writer.add_scalar('eval_{}'.format(key), value, global_step) + tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step) + tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args.logging_steps, global_step) + logging_loss = tr_loss + + if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: + # Save model checkpoint + output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step)) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training + model_to_save.save_pretrained(output_dir) + torch.save(args, os.path.join(output_dir, 'training_args.bin')) + logger.info("Saving model checkpoint to %s", output_dir) + + if args.max_steps > 0 and global_step > args.max_steps: + epoch_iterator.close() + break + if args.max_steps > 0 and global_step > args.max_steps: + train_iterator.close() + break + + return global_step, tr_loss / global_step + + +def evaluate(args, model, tokenizer, prefix=""): + dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True) + + if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: + os.makedirs(args.output_dir) + + args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) + # Note that DistributedSampler samples randomly + eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset) + eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) + + # Eval! + logger.info("***** Running evaluation {} *****".format(prefix)) + logger.info(" Num examples = %d", len(dataset)) + logger.info(" Batch size = %d", args.eval_batch_size) + all_results = [] + for batch in tqdm(eval_dataloader, desc="Evaluating"): + model.eval() + batch = tuple(t.to(args.device) for t in batch) + example_indices = batch[3] + with torch.no_grad(): + inputs = {'input_ids': batch[0], + 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids + 'attention_mask': batch[2]} + outputs = model(**inputs) + batch_start_logits, batch_end_logits = outputs[:2] + + for i, example_index in enumerate(example_indices): + start_logits = batch_start_logits[i].detach().cpu().tolist() + end_logits = batch_end_logits[i].detach().cpu().tolist() + eval_feature = features[example_index.item()] + unique_id = int(eval_feature.unique_id) + all_results.append(RawResult(unique_id=unique_id, + start_logits=start_logits, + end_logits=end_logits)) + + output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) + output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) + output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + all_predictions = write_predictions(examples, features, all_results, + args.n_best_size, args.max_answer_length, + args.do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, + args.verbose_logging, args.version_2_with_negative, + args.null_score_diff_threshold) + + evaluate_options = EVAL_OPTS(data_file=args.predict_file, + pred_file=output_prediction_file, + na_prob_file=output_null_log_odds_file) + results = evaluate_on_squad(evaluate_options) + return results + + +def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): + # Load data features from cache or dataset file + input_file = args.predict_file if evaluate else args.train_file + cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( + 'dev' if evaluate else 'train', + list(filter(None, args.model_name.split('/'))).pop(), + str(args.max_seq_length))) + if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: + logger.info("Loading features from cached file %s", cached_features_file) + features = torch.load(cached_features_file) + else: + logger.info("Creating features from dataset file at %s", input_file) + examples = read_squad_examples(input_file=input_file, + is_training=not evaluate, + version_2_with_negative=args.version_2_with_negative) + features = convert_examples_to_features(examples=examples, + tokenizer=tokenizer, + max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, + max_query_length=args.max_query_length, + is_training=not evaluate) + if args.local_rank in [-1, 0]: + logger.info("Saving features into cached file %s", cached_features_file) + torch.save(features, cached_features_file) + + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) + all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) + if evaluate: + all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) + else: + all_start_positions = torch.tensor([f.start_position for f in features], dtype=torch.long) + all_end_positions = torch.tensor([f.end_position for f in features], dtype=torch.long) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions) + + if output_examples: + return dataset, examples, features + return dataset + + +def main(): + parser = argparse.ArgumentParser() + + ## Required parameters + parser.add_argument("--train_file", default=None, type=str, required=True, + help="SQuAD json for training. E.g., train-v1.1.json") + parser.add_argument("--predict_file", default=None, type=str, required=True, + help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") + parser.add_argument("--model_name", default=None, type=str, required=True, + help="Bert/XLNet/XLM pre-trained model selected in the list: " + ", ".join(ALL_MODELS)) + parser.add_argument("--output_dir", default=None, type=str, required=True, + help="The output directory where the model checkpoints and predictions will be written.") + + ## Other parameters + parser.add_argument("--config_name", default="", type=str, + help="Pretrained config name or path if not the same as model_name") + parser.add_argument("--tokenizer_name", default="", type=str, + help="Pretrained tokenizer name or path if not the same as model_name") + parser.add_argument("--cache_dir", default="", type=str, + help="Where do you want to store the pre-trained models downloaded from s3") + + parser.add_argument('--version_2_with_negative', action='store_true', + help='If true, the SQuAD examples contain some that do not have an answer.') + parser.add_argument('--null_score_diff_threshold', type=float, default=0.0, + help="If null_score - best_non_null is greater than the threshold predict null.") + + parser.add_argument("--max_seq_length", default=384, type=int, + help="The maximum total input sequence length after WordPiece tokenization. Sequences " + "longer than this will be truncated, and sequences shorter than this will be padded.") + parser.add_argument("--doc_stride", default=128, type=int, + help="When splitting up a long document into chunks, how much stride to take between chunks.") + parser.add_argument("--max_query_length", default=64, type=int, + help="The maximum number of tokens for the question. Questions longer than this will " + "be truncated to this length.") + parser.add_argument("--do_train", action='store_true', + help="Whether to run training.") + parser.add_argument("--do_eval", action='store_true', + help="Whether to run eval on the dev set.") + parser.add_argument("--evaluate_during_training", action='store_true', + help="Rul evaluation during training at each logging step.") + parser.add_argument("--do_lower_case", action='store_true', + help="Set this flag if you are using an uncased model.") + + parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, + help="Batch size per GPU/CPU for training.") + parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, + help="Batch size per GPU/CPU for evaluation.") + parser.add_argument("--learning_rate", default=5e-5, type=float, + help="The initial learning rate for Adam.") + parser.add_argument('--gradient_accumulation_steps', type=int, default=1, + help="Number of updates steps to accumulate before performing a backward/update pass.") + parser.add_argument("--weight_decay", default=0.0, type=float, + help="Weight deay if we apply some.") + parser.add_argument("--adam_epsilon", default=1e-8, type=float, + help="Epsilon for Adam optimizer.") + parser.add_argument("--max_grad_norm", default=1.0, type=float, + help="Max gradient norm.") + parser.add_argument("--num_train_epochs", default=3.0, type=float, + help="Total number of training epochs to perform.") + parser.add_argument("--max_steps", default=-1, type=int, + help="If > 0: set total number of training steps to perform. Override num_train_epochs.") + parser.add_argument("--warmup_steps", default=0, type=int, + help="Linear warmup over warmup_steps.") + parser.add_argument("--n_best_size", default=20, type=int, + help="The total number of n-best predictions to generate in the nbest_predictions.json output file.") + parser.add_argument("--max_answer_length", default=30, type=int, + help="The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another.") + parser.add_argument("--verbose_logging", action='store_true', + help="If true, all of the warnings related to data processing will be printed. " + "A number of warnings are expected for a normal SQuAD evaluation.") + + parser.add_argument('--logging_steps', type=int, default=50, + help="Log every X updates steps.") + parser.add_argument('--save_steps', type=int, default=50, + help="Save checkpoint every X updates steps.") + parser.add_argument("--eval_all_checkpoints", action='store_true', + help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number") + parser.add_argument("--no_cuda", action='store_true', + help="Whether not to use CUDA when available") + parser.add_argument('--overwrite_output_dir', action='store_true', + help="Overwrite the content of the output directory") + parser.add_argument('--overwrite_cache', action='store_true', + help="Overwrite the cached training and evaluation sets") + parser.add_argument('--seed', type=int, default=42, + help="random seed for initialization") + + parser.add_argument("--local_rank", type=int, default=-1, + help="local_rank for distributed training on gpus") + parser.add_argument('--fp16', action='store_true', + help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit") + parser.add_argument('--fp16_opt_level', type=str, default='O1', + help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html") + parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") + parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") + args = parser.parse_args() + + if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir: + raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir)) + + # Setup distant debugging if needed + if args.server_ip and args.server_port: + # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script + import ptvsd + print("Waiting for debugger attach") + ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) + ptvsd.wait_for_attach() + + # Setup CUDA, GPU & distributed training + if args.local_rank == -1 or args.no_cuda: + device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") + args.n_gpu = torch.cuda.device_count() + else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs + torch.cuda.set_device(args.local_rank) + device = torch.device("cuda", args.local_rank) + torch.distributed.init_process_group(backend='nccl') + args.n_gpu = 1 + args.device = device + + # Setup logging + logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN) + logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", + args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16) + + # Set seed + set_seed(args) + + # Load pretrained model and tokenizer + if args.local_rank not in [-1, 0]: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab + + args.model_type = "" + for key in MODEL_CLASSES: + if key in args.model_name.lower(): + args.model_type = key # take the first match in model types + break + config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] + config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name) + tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name, do_lower_case=args.do_lower_case) + model = model_class.from_pretrained(args.model_name, from_tf=bool('.ckpt' in args.model_name), config=config) + + if args.local_rank == 0: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab + + # Distributed and parrallel training + model.to(args.device) + if args.local_rank != -1: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], + output_device=args.local_rank, + find_unused_parameters=True) + elif args.n_gpu > 1: + model = torch.nn.DataParallel(model) + + logger.info("Training/evaluation parameters %s", args) + + # Training + if args.do_train: + train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False) + global_step, tr_loss = train(args, train_dataset, model, tokenizer) + logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) + + + # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() + if args.local_rank == -1 or torch.distributed.get_rank() == 0: + # Create output directory if needed + if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: + os.makedirs(args.output_dir) + + logger.info("Saving model checkpoint to %s", args.output_dir) + # Save a trained model, configuration and tokenizer using `save_pretrained()`. + # They can then be reloaded using `from_pretrained()` + model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training + model_to_save.save_pretrained(args.output_dir) + tokenizer.save_pretrained(args.output_dir) + + # Good practice: save your training arguments together with the trained model + torch.save(args, os.path.join(args.output_dir, 'training_args.bin')) + + # Load a trained model and vocabulary that you have fine-tuned + model = model_class.from_pretrained(args.output_dir) + tokenizer = tokenizer_class.from_pretrained(args.output_dir) + model.to(args.device) + + + # Evaluation + results = {} + if args.do_eval and args.local_rank in [-1, 0]: + checkpoints = [args.output_dir] + if args.eval_all_checkpoints: + checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) + logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging + logger.info("Evaluate the following checkpoints: %s", checkpoints) + for checkpoint in checkpoints: + global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" + model = model_class.from_pretrained(checkpoint) + model.to(args.device) + result = evaluate(args, model, tokenizer, prefix=global_step) + result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) + results.update(result) + logger.info("Results: {}".format(results)) + return results + + +if __name__ == "__main__": + main() From 1ba2db8980b1b64f82de49c1fba2ce1ff06ebb10 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Mon, 15 Jul 2019 13:31:43 +0200 Subject: [PATCH 06/43] foundations sklearn wrapper XLNet --- cdqa/reader/reader_sklearn.py | 396 +++++++++++++++++----------------- 1 file changed, 192 insertions(+), 204 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index af4a771f..03e3f729 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -267,211 +267,199 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal return dataset -def main(): - parser = argparse.ArgumentParser() - - ## Required parameters - parser.add_argument("--train_file", default=None, type=str, required=True, - help="SQuAD json for training. E.g., train-v1.1.json") - parser.add_argument("--predict_file", default=None, type=str, required=True, - help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") - parser.add_argument("--model_name", default=None, type=str, required=True, - help="Bert/XLNet/XLM pre-trained model selected in the list: " + ", ".join(ALL_MODELS)) - parser.add_argument("--output_dir", default=None, type=str, required=True, - help="The output directory where the model checkpoints and predictions will be written.") - - ## Other parameters - parser.add_argument("--config_name", default="", type=str, - help="Pretrained config name or path if not the same as model_name") - parser.add_argument("--tokenizer_name", default="", type=str, - help="Pretrained tokenizer name or path if not the same as model_name") - parser.add_argument("--cache_dir", default="", type=str, - help="Where do you want to store the pre-trained models downloaded from s3") - - parser.add_argument('--version_2_with_negative', action='store_true', - help='If true, the SQuAD examples contain some that do not have an answer.') - parser.add_argument('--null_score_diff_threshold', type=float, default=0.0, - help="If null_score - best_non_null is greater than the threshold predict null.") - - parser.add_argument("--max_seq_length", default=384, type=int, - help="The maximum total input sequence length after WordPiece tokenization. Sequences " - "longer than this will be truncated, and sequences shorter than this will be padded.") - parser.add_argument("--doc_stride", default=128, type=int, - help="When splitting up a long document into chunks, how much stride to take between chunks.") - parser.add_argument("--max_query_length", default=64, type=int, - help="The maximum number of tokens for the question. Questions longer than this will " - "be truncated to this length.") - parser.add_argument("--do_train", action='store_true', - help="Whether to run training.") - parser.add_argument("--do_eval", action='store_true', - help="Whether to run eval on the dev set.") - parser.add_argument("--evaluate_during_training", action='store_true', - help="Rul evaluation during training at each logging step.") - parser.add_argument("--do_lower_case", action='store_true', - help="Set this flag if you are using an uncased model.") - - parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, - help="Batch size per GPU/CPU for training.") - parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, - help="Batch size per GPU/CPU for evaluation.") - parser.add_argument("--learning_rate", default=5e-5, type=float, - help="The initial learning rate for Adam.") - parser.add_argument('--gradient_accumulation_steps', type=int, default=1, - help="Number of updates steps to accumulate before performing a backward/update pass.") - parser.add_argument("--weight_decay", default=0.0, type=float, - help="Weight deay if we apply some.") - parser.add_argument("--adam_epsilon", default=1e-8, type=float, - help="Epsilon for Adam optimizer.") - parser.add_argument("--max_grad_norm", default=1.0, type=float, - help="Max gradient norm.") - parser.add_argument("--num_train_epochs", default=3.0, type=float, - help="Total number of training epochs to perform.") - parser.add_argument("--max_steps", default=-1, type=int, - help="If > 0: set total number of training steps to perform. Override num_train_epochs.") - parser.add_argument("--warmup_steps", default=0, type=int, - help="Linear warmup over warmup_steps.") - parser.add_argument("--n_best_size", default=20, type=int, - help="The total number of n-best predictions to generate in the nbest_predictions.json output file.") - parser.add_argument("--max_answer_length", default=30, type=int, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.") - parser.add_argument("--verbose_logging", action='store_true', - help="If true, all of the warnings related to data processing will be printed. " - "A number of warnings are expected for a normal SQuAD evaluation.") - - parser.add_argument('--logging_steps', type=int, default=50, - help="Log every X updates steps.") - parser.add_argument('--save_steps', type=int, default=50, - help="Save checkpoint every X updates steps.") - parser.add_argument("--eval_all_checkpoints", action='store_true', - help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number") - parser.add_argument("--no_cuda", action='store_true', - help="Whether not to use CUDA when available") - parser.add_argument('--overwrite_output_dir', action='store_true', - help="Overwrite the content of the output directory") - parser.add_argument('--overwrite_cache', action='store_true', - help="Overwrite the cached training and evaluation sets") - parser.add_argument('--seed', type=int, default=42, - help="random seed for initialization") - - parser.add_argument("--local_rank", type=int, default=-1, - help="local_rank for distributed training on gpus") - parser.add_argument('--fp16', action='store_true', - help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit") - parser.add_argument('--fp16_opt_level', type=str, default='O1', - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html") - parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") - parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") - args = parser.parse_args() - - if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir: - raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir)) - - # Setup distant debugging if needed - if args.server_ip and args.server_port: - # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script - import ptvsd - print("Waiting for debugger attach") - ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) - ptvsd.wait_for_attach() - - # Setup CUDA, GPU & distributed training - if args.local_rank == -1 or args.no_cuda: - device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() - else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs - torch.cuda.set_device(args.local_rank) - device = torch.device("cuda", args.local_rank) - torch.distributed.init_process_group(backend='nccl') - args.n_gpu = 1 - args.device = device - - # Setup logging - logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN) - logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", - args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16) - - # Set seed - set_seed(args) - - # Load pretrained model and tokenizer - if args.local_rank not in [-1, 0]: - torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - - args.model_type = "" - for key in MODEL_CLASSES: - if key in args.model_name.lower(): - args.model_type = key # take the first match in model types - break - config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] - config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name) - tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name, do_lower_case=args.do_lower_case) - model = model_class.from_pretrained(args.model_name, from_tf=bool('.ckpt' in args.model_name), config=config) - - if args.local_rank == 0: - torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - - # Distributed and parrallel training - model.to(args.device) - if args.local_rank != -1: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], - output_device=args.local_rank, - find_unused_parameters=True) - elif args.n_gpu > 1: - model = torch.nn.DataParallel(model) - - logger.info("Training/evaluation parameters %s", args) - - # Training - if args.do_train: - train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False) - global_step, tr_loss = train(args, train_dataset, model, tokenizer) - logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) +class Reader(BaseEstimator): + """ + """ + + def __init__(self, + train_file=None, + predict_file=None, + model_name=None, + output_dir=None, + config_name="", + tokenizer_name="", + cache_dir="", + version_2_with_negative=True, + null_score_diff_threshold=0.0, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + do_train=True, + do_eval=True, + evaluate_during_training=True, + do_lower_case=True, + per_gpu_train_batch_size=8, + per_gpu_eval_batch_size=8, + learning_rate=5e-5, + gradient_accumulation_steps=1, + weight_decay=0.0, + adam_epsilon=1e-8, + max_grad_norm=1.0, + num_train_epochs=3.0, + max_steps=-1, + warmup_steps=0, + n_best_size=20, + max_answer_length=30, + verbose_logging=True, + logging_steps=50, + save_steps=50, + eval_all_checkpoints=True, + no_cuda=True, + overwrite_output_dir=True, + overwrite_cache=True, + seed=42, + local_rank=-1, + fp16=True, + fp16_opt_level='O1', + server_ip='', + server_port=''): + + self.train_file = train_file + self.predict_file = predict_file + self.model_name = model_name + self.output_dir = output_dir + self.config_name = config_name + self.tokenizer_name = tokenizer_name + self.cache_dir = cache_dir + self.version_2_with_negative = version_2_with_negative + self.null_score_diff_threshold = null_score_diff_threshold + self.max_seq_length = max_seq_length + self.doc_stride = doc_stride + self.max_query_length = max_query_length + self.do_train = do_train + self.do_eval = do_eval + self.evaluate_during_training = evaluate_during_training + self.do_lower_case = do_lower_case + self.per_gpu_train_batch_size = per_gpu_train_batch_size + self.per_gpu_eval_batch_size = per_gpu_eval_batch_size + self.learning_rate = learning_rate + self.gradient_accumulation_steps = gradient_accumulation_steps + self.weight_decay = weight_decay + self.adam_epsilon = adam_epsilon + self.max_grad_norm = max_grad_norm + self.num_train_epochs = num_train_epochs + self.max_steps = max_steps + self.warmup_steps = warmup_steps + self.n_best_size = n_best_size + self.max_answer_length = max_answer_length + self.verbose_logging = verbose_logging + self.logging_steps = logging_steps + self.save_steps = save_steps + self.eval_all_checkpoints = eval_all_checkpoints + self.no_cuda = no_cuda + self.overwrite_output_dir = overwrite_output_dir + self.overwrite_cache = overwrite_cache + self.seed = seed + self.local_rank = local_rank + self.fp16 = fp16 + self.fp16_opt_level = fp16_opt_level + self.server_ip = server_ip + self.server_port = server_port + + # Setup distant debugging if needed + if self.server_ip and self.server_port: + # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script + import ptvsd + print("Waiting for debugger attach") + ptvsd.enable_attach(address=(self.server_ip, self.server_port), redirect_output=True) + ptvsd.wait_for_attach() + + # Setup CUDA, GPU & distributed training + if self.local_rank == -1 or self.no_cuda: + device = torch.device("cuda" if torch.cuda.is_available() and not self.no_cuda else "cpu") + self.n_gpu = torch.cuda.device_count() + else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs + torch.cuda.set_device(self.local_rank) + device = torch.device("cuda", self.local_rank) + torch.distributed.init_process_group(backend='nccl') + self.n_gpu = 1 + self.device = device + + # Setup logging + logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.INFO if self.local_rank in [-1, 0] else logging.WARN) + logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", + self.local_rank, device, self.n_gpu, bool(self.local_rank != -1), self.fp16) + + # Set seed + set_seed(self) + + # Load pretrained model and tokenizer + if self.local_rank not in [-1, 0]: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab + + self.model_type = "" + for key in MODEL_CLASSES: + if key in self.model_name.lower(): + self.model_type = key # take the first match in model types + break + config_class, model_class, tokenizer_class = MODEL_CLASSES[self.model_type] + config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name) + tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name, do_lower_case=self.do_lower_case) + model = model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) + if self.local_rank == 0: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() - if args.local_rank == -1 or torch.distributed.get_rank() == 0: - # Create output directory if needed - if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: - os.makedirs(args.output_dir) - - logger.info("Saving model checkpoint to %s", args.output_dir) - # Save a trained model, configuration and tokenizer using `save_pretrained()`. - # They can then be reloaded using `from_pretrained()` - model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training - model_to_save.save_pretrained(args.output_dir) - tokenizer.save_pretrained(args.output_dir) - - # Good practice: save your training arguments together with the trained model - torch.save(args, os.path.join(args.output_dir, 'training_args.bin')) - - # Load a trained model and vocabulary that you have fine-tuned - model = model_class.from_pretrained(args.output_dir) - tokenizer = tokenizer_class.from_pretrained(args.output_dir) - model.to(args.device) - - - # Evaluation - results = {} - if args.do_eval and args.local_rank in [-1, 0]: - checkpoints = [args.output_dir] - if args.eval_all_checkpoints: - checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) - logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging - logger.info("Evaluate the following checkpoints: %s", checkpoints) - for checkpoint in checkpoints: - global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" - model = model_class.from_pretrained(checkpoint) - model.to(args.device) - result = evaluate(args, model, tokenizer, prefix=global_step) - result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) - results.update(result) - logger.info("Results: {}".format(results)) - return results + # Distributed and parrallel training + model.to(self.device) + if self.local_rank != -1: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[self.local_rank], + output_device=self.local_rank, + find_unused_parameters=True) + elif self.n_gpu > 1: + model = torch.nn.DataParallel(model) + logger.info("Training/evaluation parameters %s", self) + + def fit(self, X, y=None): + + if os.path.exists(self.output_dir) and os.listdir(self.output_dir) and not self.overwrite_output_dir: + raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(self.output_dir)) + + train_dataset = load_and_cache_examples(self, tokenizer, evaluate=False, output_examples=False) + global_step, tr_loss = train(self, train_dataset, model, tokenizer) + logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) -if __name__ == "__main__": - main() + # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() + if self.local_rank == -1 or torch.distributed.get_rank() == 0: + # Create output directory if needed + if not os.path.exists(self.output_dir) and self.local_rank in [-1, 0]: + os.makedirs(self.output_dir) + + logger.info("Saving model checkpoint to %s", self.output_dir) + # Save a trained model, configuration and tokenizer using `save_pretrained()`. + # They can then be reloaded using `from_pretrained()` + model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training + model_to_save.save_pretrained(self.output_dir) + tokenizer.save_pretrained(self.output_dir) + + # Good practice: save your training arguments together with the trained model + torch.save(self, os.path.join(self.output_dir, 'training_args.bin')) + + # Load a trained model and vocabulary that you have fine-tuned + model = model_class.from_pretrained(self.output_dir) + tokenizer = tokenizer_class.from_pretrained(self.output_dir) + model.to(self.device) + + return self + + def predict(self, X): + + results = {} + if self.do_eval and self.local_rank in [-1, 0]: + checkpoints = [self.output_dir] + if self.eval_all_checkpoints: + checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(self.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) + logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging + logger.info("Evaluate the following checkpoints: %s", checkpoints) + for checkpoint in checkpoints: + global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" + model = model_class.from_pretrained(checkpoint) + model.to(self.device) + result = evaluate(self, model, tokenizer, prefix=global_step) + result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) + results.update(result) + logger.info("Results: {}".format(results)) + return results From 211ca68f4f3472c23ac1fdb4a64e0b8077e43a58 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Mon, 15 Jul 2019 16:37:33 +0200 Subject: [PATCH 07/43] replace args from parser by class parameters --- cdqa/reader/reader_sklearn.py | 37 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 03e3f729..2a7ed0d0 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -326,8 +326,6 @@ def __init__(self, self.max_seq_length = max_seq_length self.doc_stride = doc_stride self.max_query_length = max_query_length - self.do_train = do_train - self.do_eval = do_eval self.evaluate_during_training = evaluate_during_training self.do_lower_case = do_lower_case self.per_gpu_train_batch_size = per_gpu_train_batch_size @@ -396,30 +394,36 @@ def __init__(self, break config_class, model_class, tokenizer_class = MODEL_CLASSES[self.model_type] config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name) - tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name, do_lower_case=self.do_lower_case) - model = model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) + self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name, do_lower_case=self.do_lower_case) + self.model = model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) if self.local_rank == 0: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab # Distributed and parrallel training - model.to(self.device) + self.model.to(self.device) if self.local_rank != -1: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[self.local_rank], + self.model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[self.local_rank], output_device=self.local_rank, find_unused_parameters=True) elif self.n_gpu > 1: - model = torch.nn.DataParallel(model) + self.model = torch.nn.DataParallel(model) logger.info("Training/evaluation parameters %s", self) + if pretrained_model_path: + # Load a trained model and vocabulary that you have fine-tuned + self.model = model_class.from_pretrained(self.pretrained_model_path) + self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) + self.model.to(self.device) + def fit(self, X, y=None): if os.path.exists(self.output_dir) and os.listdir(self.output_dir) and not self.overwrite_output_dir: raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(self.output_dir)) train_dataset = load_and_cache_examples(self, tokenizer, evaluate=False, output_examples=False) - global_step, tr_loss = train(self, train_dataset, model, tokenizer) + global_step, tr_loss = train(self, train_dataset, self.model, self.tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() @@ -431,24 +435,19 @@ def fit(self, X, y=None): logger.info("Saving model checkpoint to %s", self.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` - model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training + model_to_save = self.model.module if hasattr(self.model, 'module') else self.model # Take care of distributed/parallel training model_to_save.save_pretrained(self.output_dir) - tokenizer.save_pretrained(self.output_dir) + self.tokenizer.save_pretrained(self.output_dir) # Good practice: save your training arguments together with the trained model - torch.save(self, os.path.join(self.output_dir, 'training_args.bin')) - - # Load a trained model and vocabulary that you have fine-tuned - model = model_class.from_pretrained(self.output_dir) - tokenizer = tokenizer_class.from_pretrained(self.output_dir) - model.to(self.device) + torch.save(self.get_params(), os.path.join(self.output_dir, 'training_args.bin')) return self def predict(self, X): results = {} - if self.do_eval and self.local_rank in [-1, 0]: + if self.local_rank in [-1, 0]: checkpoints = [self.output_dir] if self.eval_all_checkpoints: checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(self.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) @@ -456,8 +455,8 @@ def predict(self, X): logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" - model = model_class.from_pretrained(checkpoint) - model.to(self.device) + self.model = model_class.from_pretrained(checkpoint) + self.model.to(self.device) result = evaluate(self, model, tokenizer, prefix=global_step) result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) From eb8ee3f3459a7a9187881b71dc407c8eebea1b02 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Mon, 15 Jul 2019 17:02:16 +0200 Subject: [PATCH 08/43] fix indent error --- cdqa/reader/reader_sklearn.py | 122 +++++++++++++++++----------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 2a7ed0d0..a90e4cf6 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -354,68 +354,68 @@ def __init__(self, self.server_ip = server_ip self.server_port = server_port - # Setup distant debugging if needed - if self.server_ip and self.server_port: - # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script - import ptvsd - print("Waiting for debugger attach") - ptvsd.enable_attach(address=(self.server_ip, self.server_port), redirect_output=True) - ptvsd.wait_for_attach() - - # Setup CUDA, GPU & distributed training - if self.local_rank == -1 or self.no_cuda: - device = torch.device("cuda" if torch.cuda.is_available() and not self.no_cuda else "cpu") - self.n_gpu = torch.cuda.device_count() - else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs - torch.cuda.set_device(self.local_rank) - device = torch.device("cuda", self.local_rank) - torch.distributed.init_process_group(backend='nccl') - self.n_gpu = 1 - self.device = device - - # Setup logging - logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.INFO if self.local_rank in [-1, 0] else logging.WARN) - logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", - self.local_rank, device, self.n_gpu, bool(self.local_rank != -1), self.fp16) - - # Set seed - set_seed(self) - - # Load pretrained model and tokenizer - if self.local_rank not in [-1, 0]: - torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - - self.model_type = "" - for key in MODEL_CLASSES: - if key in self.model_name.lower(): - self.model_type = key # take the first match in model types - break - config_class, model_class, tokenizer_class = MODEL_CLASSES[self.model_type] - config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name) - self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name, do_lower_case=self.do_lower_case) - self.model = model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) - - if self.local_rank == 0: - torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - - # Distributed and parrallel training - self.model.to(self.device) - if self.local_rank != -1: - self.model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[self.local_rank], - output_device=self.local_rank, - find_unused_parameters=True) - elif self.n_gpu > 1: - self.model = torch.nn.DataParallel(model) - - logger.info("Training/evaluation parameters %s", self) - - if pretrained_model_path: - # Load a trained model and vocabulary that you have fine-tuned - self.model = model_class.from_pretrained(self.pretrained_model_path) - self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) + # Setup distant debugging if needed + if self.server_ip and self.server_port: + # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script + import ptvsd + print("Waiting for debugger attach") + ptvsd.enable_attach(address=(self.server_ip, self.server_port), redirect_output=True) + ptvsd.wait_for_attach() + + # Setup CUDA, GPU & distributed training + if self.local_rank == -1 or self.no_cuda: + device = torch.device("cuda" if torch.cuda.is_available() and not self.no_cuda else "cpu") + self.n_gpu = torch.cuda.device_count() + else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs + torch.cuda.set_device(self.local_rank) + device = torch.device("cuda", self.local_rank) + torch.distributed.init_process_group(backend='nccl') + self.n_gpu = 1 + self.device = device + + # Setup logging + logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.INFO if self.local_rank in [-1, 0] else logging.WARN) + logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", + self.local_rank, device, self.n_gpu, bool(self.local_rank != -1), self.fp16) + + # Set seed + set_seed(self) + + # Load pretrained model and tokenizer + if self.local_rank not in [-1, 0]: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab + + self.model_type = "" + for key in MODEL_CLASSES: + if key in self.model_name.lower(): + self.model_type = key # take the first match in model types + break + config_class, model_class, tokenizer_class = MODEL_CLASSES[self.model_type] + config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name) + self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name, do_lower_case=self.do_lower_case) + self.model = model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) + + if self.local_rank == 0: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab + + # Distributed and parrallel training self.model.to(self.device) + if self.local_rank != -1: + self.model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[self.local_rank], + output_device=self.local_rank, + find_unused_parameters=True) + elif self.n_gpu > 1: + self.model = torch.nn.DataParallel(model) + + logger.info("Training/evaluation parameters %s", self) + + if pretrained_model_path: + # Load a trained model and vocabulary that you have fine-tuned + self.model = model_class.from_pretrained(self.pretrained_model_path) + self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) + self.model.to(self.device) def fit(self, X, y=None): From a2ab7b500dc24830c093adb8fe5da230625f985d Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Mon, 15 Jul 2019 17:20:56 +0200 Subject: [PATCH 09/43] little fixes --- cdqa/reader/reader_sklearn.py | 30 +-- cdqa/reader/utils_squad_evaluate.py | 289 ++++++++++++++++++++++++++++ 2 files changed, 308 insertions(+), 11 deletions(-) create mode 100644 cdqa/reader/utils_squad_evaluate.py diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index a90e4cf6..8bef7389 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -45,6 +45,8 @@ from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad +from sklearn.base import BaseEstimator + logger = logging.getLogger(__name__) ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) \ @@ -312,7 +314,8 @@ def __init__(self, fp16=True, fp16_opt_level='O1', server_ip='', - server_port=''): + server_port='', + pretrained_model_path=None): self.train_file = train_file self.predict_file = predict_file @@ -353,6 +356,7 @@ def __init__(self, self.fp16_opt_level = fp16_opt_level self.server_ip = server_ip self.server_port = server_port + self.pretrained_model_path = pretrained_model_path # Setup distant debugging if needed if self.server_ip and self.server_port: @@ -392,10 +396,10 @@ def __init__(self, if key in self.model_name.lower(): self.model_type = key # take the first match in model types break - config_class, model_class, tokenizer_class = MODEL_CLASSES[self.model_type] + config_class, self.model_class, tokenizer_class = MODEL_CLASSES[self.model_type] config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name) self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name, do_lower_case=self.do_lower_case) - self.model = model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) + self.model = self.model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) if self.local_rank == 0: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab @@ -403,17 +407,17 @@ def __init__(self, # Distributed and parrallel training self.model.to(self.device) if self.local_rank != -1: - self.model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[self.local_rank], + self.model = torch.nn.parallel.DistributedDataParallel(self.model, device_ids=[self.local_rank], output_device=self.local_rank, find_unused_parameters=True) elif self.n_gpu > 1: - self.model = torch.nn.DataParallel(model) + self.model = torch.nn.DataParallel(self.model) logger.info("Training/evaluation parameters %s", self) - if pretrained_model_path: + if self.pretrained_model_path: # Load a trained model and vocabulary that you have fine-tuned - self.model = model_class.from_pretrained(self.pretrained_model_path) + self.model = self.model_class.from_pretrained(self.pretrained_model_path) self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) self.model.to(self.device) @@ -422,7 +426,7 @@ def fit(self, X, y=None): if os.path.exists(self.output_dir) and os.listdir(self.output_dir) and not self.overwrite_output_dir: raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(self.output_dir)) - train_dataset = load_and_cache_examples(self, tokenizer, evaluate=False, output_examples=False) + train_dataset = load_and_cache_examples(self, self.tokenizer, evaluate=False, output_examples=False) global_step, tr_loss = train(self, train_dataset, self.model, self.tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) @@ -444,7 +448,7 @@ def fit(self, X, y=None): return self - def predict(self, X): + def evaluate(self, X): results = {} if self.local_rank in [-1, 0]: @@ -455,10 +459,14 @@ def predict(self, X): logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" - self.model = model_class.from_pretrained(checkpoint) + self.model = self.model_class.from_pretrained(checkpoint) self.model.to(self.device) - result = evaluate(self, model, tokenizer, prefix=global_step) + result = evaluate(self, self.model, self.tokenizer, prefix=global_step) result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) logger.info("Results: {}".format(results)) return results + + def predict(self, X): + + return '' diff --git a/cdqa/reader/utils_squad_evaluate.py b/cdqa/reader/utils_squad_evaluate.py new file mode 100644 index 00000000..d0cf643f --- /dev/null +++ b/cdqa/reader/utils_squad_evaluate.py @@ -0,0 +1,289 @@ +"""Official evaluation script for SQuAD version 2.0. + +In addition to basic functionality, we also compute additional statistics and +plot precision-recall curves if an additional na_prob.json file is provided. +This file is expected to map question ID's to the model's predicted probability +that a question is unanswerable. +""" +import argparse +import collections +import json +import numpy as np +import os +import re +import string +import sys + +class EVAL_OPTS(): + def __init__(self, data_file, pred_file, out_file="", + na_prob_file="na_prob.json", na_prob_thresh=1.0, + out_image_dir=None, verbose=False): + self.data_file = data_file + self.pred_file = pred_file + self.out_file = out_file + self.na_prob_file = na_prob_file + self.na_prob_thresh = na_prob_thresh + self.out_image_dir = out_image_dir + self.verbose = verbose + +OPTS = None + +def parse_args(): + parser = argparse.ArgumentParser('Official evaluation script for SQuAD version 2.0.') + parser.add_argument('data_file', metavar='data.json', help='Input data JSON file.') + parser.add_argument('pred_file', metavar='pred.json', help='Model predictions.') + parser.add_argument('--out-file', '-o', metavar='eval.json', + help='Write accuracy metrics to file (default is stdout).') + parser.add_argument('--na-prob-file', '-n', metavar='na_prob.json', + help='Model estimates of probability of no answer.') + parser.add_argument('--na-prob-thresh', '-t', type=float, default=1.0, + help='Predict "" if no-answer probability exceeds this (default = 1.0).') + parser.add_argument('--out-image-dir', '-p', metavar='out_images', default=None, + help='Save precision-recall curves to directory.') + parser.add_argument('--verbose', '-v', action='store_true') + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + return parser.parse_args() + +def make_qid_to_has_ans(dataset): + qid_to_has_ans = {} + for article in dataset: + for p in article['paragraphs']: + for qa in p['qas']: + qid_to_has_ans[qa['id']] = bool(qa['answers']) + return qid_to_has_ans + +def normalize_answer(s): + """Lower text and remove punctuation, articles and extra whitespace.""" + def remove_articles(text): + regex = re.compile(r'\b(a|an|the)\b', re.UNICODE) + return re.sub(regex, ' ', text) + def white_space_fix(text): + return ' '.join(text.split()) + def remove_punc(text): + exclude = set(string.punctuation) + return ''.join(ch for ch in text if ch not in exclude) + def lower(text): + return text.lower() + return white_space_fix(remove_articles(remove_punc(lower(s)))) + +def get_tokens(s): + if not s: return [] + return normalize_answer(s).split() + +def compute_exact(a_gold, a_pred): + return int(normalize_answer(a_gold) == normalize_answer(a_pred)) + +def compute_f1(a_gold, a_pred): + gold_toks = get_tokens(a_gold) + pred_toks = get_tokens(a_pred) + common = collections.Counter(gold_toks) & collections.Counter(pred_toks) + num_same = sum(common.values()) + if len(gold_toks) == 0 or len(pred_toks) == 0: + # If either is no-answer, then F1 is 1 if they agree, 0 otherwise + return int(gold_toks == pred_toks) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(pred_toks) + recall = 1.0 * num_same / len(gold_toks) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + +def get_raw_scores(dataset, preds): + exact_scores = {} + f1_scores = {} + for article in dataset: + for p in article['paragraphs']: + for qa in p['qas']: + qid = qa['id'] + gold_answers = [a['text'] for a in qa['answers'] + if normalize_answer(a['text'])] + if not gold_answers: + # For unanswerable questions, only correct answer is empty string + gold_answers = [''] + if qid not in preds: + print('Missing prediction for %s' % qid) + continue + a_pred = preds[qid] + # Take max over all gold answers + exact_scores[qid] = max(compute_exact(a, a_pred) for a in gold_answers) + f1_scores[qid] = max(compute_f1(a, a_pred) for a in gold_answers) + return exact_scores, f1_scores + +def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thresh): + new_scores = {} + for qid, s in scores.items(): + pred_na = na_probs[qid] > na_prob_thresh + if pred_na: + new_scores[qid] = float(not qid_to_has_ans[qid]) + else: + new_scores[qid] = s + return new_scores + +def make_eval_dict(exact_scores, f1_scores, qid_list=None): + if not qid_list: + total = len(exact_scores) + return collections.OrderedDict([ + ('exact', 100.0 * sum(exact_scores.values()) / total), + ('f1', 100.0 * sum(f1_scores.values()) / total), + ('total', total), + ]) + else: + total = len(qid_list) + return collections.OrderedDict([ + ('exact', 100.0 * sum(exact_scores[k] for k in qid_list) / total), + ('f1', 100.0 * sum(f1_scores[k] for k in qid_list) / total), + ('total', total), + ]) + +def merge_eval(main_eval, new_eval, prefix): + for k in new_eval: + main_eval['%s_%s' % (prefix, k)] = new_eval[k] + +def plot_pr_curve(precisions, recalls, out_image, title): + plt.step(recalls, precisions, color='b', alpha=0.2, where='post') + plt.fill_between(recalls, precisions, step='post', alpha=0.2, color='b') + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.xlim([0.0, 1.05]) + plt.ylim([0.0, 1.05]) + plt.title(title) + plt.savefig(out_image) + plt.clf() + +def make_precision_recall_eval(scores, na_probs, num_true_pos, qid_to_has_ans, + out_image=None, title=None): + qid_list = sorted(na_probs, key=lambda k: na_probs[k]) + true_pos = 0.0 + cur_p = 1.0 + cur_r = 0.0 + precisions = [1.0] + recalls = [0.0] + avg_prec = 0.0 + for i, qid in enumerate(qid_list): + if qid_to_has_ans[qid]: + true_pos += scores[qid] + cur_p = true_pos / float(i+1) + cur_r = true_pos / float(num_true_pos) + if i == len(qid_list) - 1 or na_probs[qid] != na_probs[qid_list[i+1]]: + # i.e., if we can put a threshold after this point + avg_prec += cur_p * (cur_r - recalls[-1]) + precisions.append(cur_p) + recalls.append(cur_r) + if out_image: + plot_pr_curve(precisions, recalls, out_image, title) + return {'ap': 100.0 * avg_prec} + +def run_precision_recall_analysis(main_eval, exact_raw, f1_raw, na_probs, + qid_to_has_ans, out_image_dir): + if out_image_dir and not os.path.exists(out_image_dir): + os.makedirs(out_image_dir) + num_true_pos = sum(1 for v in qid_to_has_ans.values() if v) + if num_true_pos == 0: + return + pr_exact = make_precision_recall_eval( + exact_raw, na_probs, num_true_pos, qid_to_has_ans, + out_image=os.path.join(out_image_dir, 'pr_exact.png'), + title='Precision-Recall curve for Exact Match score') + pr_f1 = make_precision_recall_eval( + f1_raw, na_probs, num_true_pos, qid_to_has_ans, + out_image=os.path.join(out_image_dir, 'pr_f1.png'), + title='Precision-Recall curve for F1 score') + oracle_scores = {k: float(v) for k, v in qid_to_has_ans.items()} + pr_oracle = make_precision_recall_eval( + oracle_scores, na_probs, num_true_pos, qid_to_has_ans, + out_image=os.path.join(out_image_dir, 'pr_oracle.png'), + title='Oracle Precision-Recall curve (binary task of HasAns vs. NoAns)') + merge_eval(main_eval, pr_exact, 'pr_exact') + merge_eval(main_eval, pr_f1, 'pr_f1') + merge_eval(main_eval, pr_oracle, 'pr_oracle') + +def histogram_na_prob(na_probs, qid_list, image_dir, name): + if not qid_list: + return + x = [na_probs[k] for k in qid_list] + weights = np.ones_like(x) / float(len(x)) + plt.hist(x, weights=weights, bins=20, range=(0.0, 1.0)) + plt.xlabel('Model probability of no-answer') + plt.ylabel('Proportion of dataset') + plt.title('Histogram of no-answer probability: %s' % name) + plt.savefig(os.path.join(image_dir, 'na_prob_hist_%s.png' % name)) + plt.clf() + +def find_best_thresh(preds, scores, na_probs, qid_to_has_ans): + num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k]) + cur_score = num_no_ans + best_score = cur_score + best_thresh = 0.0 + qid_list = sorted(na_probs, key=lambda k: na_probs[k]) + for i, qid in enumerate(qid_list): + if qid not in scores: continue + if qid_to_has_ans[qid]: + diff = scores[qid] + else: + if preds[qid]: + diff = -1 + else: + diff = 0 + cur_score += diff + if cur_score > best_score: + best_score = cur_score + best_thresh = na_probs[qid] + return 100.0 * best_score / len(scores), best_thresh + +def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans): + best_exact, exact_thresh = find_best_thresh(preds, exact_raw, na_probs, qid_to_has_ans) + best_f1, f1_thresh = find_best_thresh(preds, f1_raw, na_probs, qid_to_has_ans) + main_eval['best_exact'] = best_exact + main_eval['best_exact_thresh'] = exact_thresh + main_eval['best_f1'] = best_f1 + main_eval['best_f1_thresh'] = f1_thresh + +def main(OPTS): + with open(OPTS.data_file) as f: + dataset_json = json.load(f) + dataset = dataset_json['data'] + with open(OPTS.pred_file) as f: + preds = json.load(f) + if OPTS.na_prob_file: + with open(OPTS.na_prob_file) as f: + na_probs = json.load(f) + else: + na_probs = {k: 0.0 for k in preds} + qid_to_has_ans = make_qid_to_has_ans(dataset) # maps qid to True/False + has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] + no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] + exact_raw, f1_raw = get_raw_scores(dataset, preds) + exact_thresh = apply_no_ans_threshold(exact_raw, na_probs, qid_to_has_ans, + OPTS.na_prob_thresh) + f1_thresh = apply_no_ans_threshold(f1_raw, na_probs, qid_to_has_ans, + OPTS.na_prob_thresh) + out_eval = make_eval_dict(exact_thresh, f1_thresh) + if has_ans_qids: + has_ans_eval = make_eval_dict(exact_thresh, f1_thresh, qid_list=has_ans_qids) + merge_eval(out_eval, has_ans_eval, 'HasAns') + if no_ans_qids: + no_ans_eval = make_eval_dict(exact_thresh, f1_thresh, qid_list=no_ans_qids) + merge_eval(out_eval, no_ans_eval, 'NoAns') + if OPTS.na_prob_file: + find_all_best_thresh(out_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans) + if OPTS.na_prob_file and OPTS.out_image_dir: + run_precision_recall_analysis(out_eval, exact_raw, f1_raw, na_probs, + qid_to_has_ans, OPTS.out_image_dir) + histogram_na_prob(na_probs, has_ans_qids, OPTS.out_image_dir, 'hasAns') + histogram_na_prob(na_probs, no_ans_qids, OPTS.out_image_dir, 'noAns') + if OPTS.out_file: + with open(OPTS.out_file, 'w') as f: + json.dump(out_eval, f) + else: + print(json.dumps(out_eval, indent=2)) + return out_eval + +if __name__ == '__main__': + OPTS = parse_args() + if OPTS.out_image_dir: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + main(OPTS) From 1318d25556b94cda0edaaf269591d7677c23f713 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Mon, 15 Jul 2019 17:24:52 +0200 Subject: [PATCH 10/43] add notebook for XLNet training on SQuAD --- examples/tutorial-train-xlnet-squad.ipynb | 367 ++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 examples/tutorial-train-xlnet-squad.ipynb diff --git a/examples/tutorial-train-xlnet-squad.ipynb b/examples/tutorial-train-xlnet-squad.ipynb new file mode 100644 index 00000000..e1c54a9c --- /dev/null +++ b/examples/tutorial-train-xlnet-squad.ipynb @@ -0,0 +1,367 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "tutorial-predict-pipeline.ipynb", + "version": "0.3.2", + "provenance": [] + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "zNtCqwveFjcK", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 + }, + "outputId": "03a577cd-62e2-405e-aa3e-a35314400191" + }, + "source": [ + "!git clone https://github.com/cdqa-suite/cdQA.git" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'cdQA'...\n", + "remote: Enumerating objects: 45, done.\u001b[K\n", + "remote: Counting objects: 2% (1/45) \u001b[K\rremote: Counting objects: 4% (2/45) \u001b[K\rremote: Counting objects: 6% (3/45) \u001b[K\rremote: Counting objects: 8% (4/45) \u001b[K\rremote: Counting objects: 11% (5/45) \u001b[K\rremote: Counting objects: 13% (6/45) \u001b[K\rremote: Counting objects: 15% (7/45) \u001b[K\rremote: Counting objects: 17% (8/45) \u001b[K\rremote: Counting objects: 20% (9/45) \u001b[K\rremote: Counting objects: 22% (10/45) \u001b[K\rremote: Counting objects: 24% (11/45) \u001b[K\rremote: Counting objects: 26% (12/45) \u001b[K\rremote: Counting objects: 28% (13/45) \u001b[K\rremote: Counting objects: 31% (14/45) \u001b[K\rremote: Counting objects: 33% (15/45) \u001b[K\rremote: Counting objects: 35% (16/45) \u001b[K\rremote: Counting objects: 37% (17/45) \u001b[K\rremote: Counting objects: 40% (18/45) \u001b[K\rremote: Counting objects: 42% (19/45) \u001b[K\rremote: Counting objects: 44% (20/45) \u001b[K\rremote: Counting objects: 46% (21/45) \u001b[K\rremote: Counting objects: 48% (22/45) \u001b[K\rremote: Counting objects: 51% (23/45) \u001b[K\rremote: Counting objects: 53% (24/45) \u001b[K\rremote: Counting objects: 55% (25/45) \u001b[K\rremote: Counting objects: 57% (26/45) \u001b[K\rremote: Counting objects: 60% (27/45) \u001b[K\rremote: Counting objects: 62% (28/45) \u001b[K\rremote: Counting objects: 64% (29/45) \u001b[K\rremote: Counting objects: 66% (30/45) \u001b[K\rremote: Counting objects: 68% (31/45) \u001b[K\rremote: Counting objects: 71% (32/45) \u001b[K\rremote: Counting objects: 73% (33/45) \u001b[K\rremote: Counting objects: 75% (34/45) \u001b[K\rremote: Counting objects: 77% (35/45) \u001b[K\rremote: Counting objects: 80% (36/45) \u001b[K\rremote: Counting objects: 82% (37/45) \u001b[K\rremote: Counting objects: 84% (38/45) \u001b[K\rremote: Counting objects: 86% (39/45) \u001b[K\rremote: Counting objects: 88% (40/45) \u001b[K\rremote: Counting objects: 91% (41/45) \u001b[K\rremote: Counting objects: 93% (42/45) \u001b[K\rremote: Counting objects: 95% (43/45) \u001b[K\rremote: Counting objects: 97% (44/45) \u001b[K\rremote: Counting objects: 100% (45/45) \u001b[K\rremote: Counting objects: 100% (45/45), done.\u001b[K\n", + "remote: Compressing objects: 2% (1/35) \u001b[K\rremote: Compressing objects: 5% (2/35) \u001b[K\rremote: Compressing objects: 8% (3/35) \u001b[K\rremote: Compressing objects: 11% (4/35) \u001b[K\rremote: Compressing objects: 14% (5/35) \u001b[K\rremote: Compressing objects: 17% (6/35) \u001b[K\rremote: Compressing objects: 20% (7/35) \u001b[K\rremote: Compressing objects: 22% (8/35) \u001b[K\rremote: Compressing objects: 25% (9/35) \u001b[K\rremote: Compressing objects: 28% (10/35) \u001b[K\rremote: Compressing objects: 31% (11/35) \u001b[K\rremote: Compressing objects: 34% (12/35) \u001b[K\rremote: Compressing objects: 37% (13/35) \u001b[K\rremote: Compressing objects: 40% (14/35) \u001b[K\rremote: Compressing objects: 42% (15/35) \u001b[K\rremote: Compressing objects: 45% (16/35) \u001b[K\rremote: Compressing objects: 48% (17/35) \u001b[K\rremote: Compressing objects: 51% (18/35) \u001b[K\rremote: Compressing objects: 54% (19/35) \u001b[K\rremote: Compressing objects: 57% (20/35) \u001b[K\rremote: Compressing objects: 60% (21/35) \u001b[K\rremote: Compressing objects: 62% (22/35) \u001b[K\rremote: Compressing objects: 65% (23/35) \u001b[K\rremote: Compressing objects: 68% (24/35) \u001b[K\rremote: Compressing objects: 71% (25/35) \u001b[K\rremote: Compressing objects: 74% (26/35) \u001b[K\rremote: Compressing objects: 77% (27/35) \u001b[K\rremote: Compressing objects: 80% (28/35) \u001b[K\rremote: Compressing objects: 82% (29/35) \u001b[K\rremote: Compressing objects: 85% (30/35) \u001b[K\rremote: Compressing objects: 88% (31/35) \u001b[K\rremote: Compressing objects: 91% (32/35) \u001b[K\rremote: Compressing objects: 94% (33/35) \u001b[K\rremote: Compressing objects: 97% (34/35) \u001b[K\rremote: Compressing objects: 100% (35/35) \u001b[K\rremote: Compressing objects: 100% (35/35), done.\u001b[K\n", + "Receiving objects: 0% (1/841) \rReceiving objects: 1% (9/841) \rReceiving objects: 2% (17/841) \rReceiving objects: 3% (26/841) \rReceiving objects: 4% (34/841) \rReceiving objects: 5% (43/841) \rReceiving objects: 6% (51/841) \rReceiving objects: 7% (59/841) \rReceiving objects: 8% (68/841) \rReceiving objects: 9% (76/841) \rReceiving objects: 10% (85/841) \rReceiving objects: 11% (93/841) \rReceiving objects: 12% (101/841) \rReceiving objects: 13% (110/841) \rReceiving objects: 14% (118/841) \rReceiving objects: 15% (127/841) \rReceiving objects: 16% (135/841) \rReceiving objects: 17% (143/841) \rReceiving objects: 18% (152/841) \rReceiving objects: 19% (160/841) \rReceiving objects: 20% (169/841) \rReceiving objects: 21% (177/841) \rReceiving objects: 22% (186/841) \rReceiving objects: 23% (194/841) \rReceiving objects: 24% (202/841) \rReceiving objects: 25% (211/841) \rReceiving objects: 26% (219/841) \rReceiving objects: 27% (228/841) \rReceiving objects: 28% (236/841) \rReceiving objects: 29% (244/841) \rReceiving objects: 30% (253/841) \rReceiving objects: 31% (261/841) \rReceiving objects: 32% (270/841) \rReceiving objects: 33% (278/841) \rReceiving objects: 34% (286/841) \rReceiving objects: 35% (295/841) \rReceiving objects: 36% (303/841) \rReceiving objects: 37% (312/841) \rReceiving objects: 38% (320/841) \rReceiving objects: 39% (328/841) \rReceiving objects: 40% (337/841) \rReceiving objects: 41% (345/841) \rReceiving objects: 42% (354/841) \rReceiving objects: 43% (362/841) \rReceiving objects: 44% (371/841) \rReceiving objects: 45% (379/841) \rReceiving objects: 46% (387/841) \rReceiving objects: 47% (396/841) \rReceiving objects: 48% (404/841) \rReceiving objects: 49% (413/841) \rReceiving objects: 50% (421/841) \rReceiving objects: 51% (429/841) \rReceiving objects: 52% (438/841) \rReceiving objects: 53% (446/841) \rReceiving objects: 54% (455/841) \rReceiving objects: 55% (463/841) \rReceiving objects: 56% (471/841) \rReceiving objects: 57% (480/841) \rReceiving objects: 58% (488/841) \rReceiving objects: 59% (497/841) \rReceiving objects: 60% (505/841) \rReceiving objects: 61% (514/841) \rReceiving objects: 62% (522/841) \rReceiving objects: 63% (530/841) \rReceiving objects: 64% (539/841) \rReceiving objects: 65% (547/841) \rReceiving objects: 66% (556/841) \rReceiving objects: 67% (564/841) \rReceiving objects: 68% (572/841) \rReceiving objects: 69% (581/841) \rReceiving objects: 70% (589/841) \rReceiving objects: 71% (598/841) \rReceiving objects: 72% (606/841) \rReceiving objects: 73% (614/841) \rReceiving objects: 74% (623/841) \rremote: Total 841 (delta 22), reused 25 (delta 10), pack-reused 796\u001b[K\n", + "Receiving objects: 75% (631/841) \rReceiving objects: 76% (640/841) \rReceiving objects: 77% (648/841) \rReceiving objects: 78% (656/841) \rReceiving objects: 79% (665/841) \rReceiving objects: 80% (673/841) \rReceiving objects: 81% (682/841) \rReceiving objects: 82% (690/841) \rReceiving objects: 83% (699/841) \rReceiving objects: 84% (707/841) \rReceiving objects: 85% (715/841) \rReceiving objects: 86% (724/841) \rReceiving objects: 87% (732/841) \rReceiving objects: 88% (741/841) \rReceiving objects: 89% (749/841) \rReceiving objects: 90% (757/841) \rReceiving objects: 91% (766/841) \rReceiving objects: 92% (774/841) \rReceiving objects: 93% (783/841) \rReceiving objects: 94% (791/841) \rReceiving objects: 95% (799/841) \rReceiving objects: 96% (808/841) \rReceiving objects: 97% (816/841) \rReceiving objects: 98% (825/841) \rReceiving objects: 99% (833/841) \rReceiving objects: 100% (841/841) \rReceiving objects: 100% (841/841), 266.80 KiB | 11.12 MiB/s, done.\n", + "Resolving deltas: 0% (0/489) \rResolving deltas: 1% (5/489) \rResolving deltas: 4% (22/489) \rResolving deltas: 5% (25/489) \rResolving deltas: 6% (31/489) \rResolving deltas: 7% (38/489) \rResolving deltas: 11% (54/489) \rResolving deltas: 13% (66/489) \rResolving deltas: 19% (96/489) \rResolving deltas: 21% (105/489) \rResolving deltas: 23% (117/489) \rResolving deltas: 27% (133/489) \rResolving deltas: 31% (152/489) \rResolving deltas: 33% (162/489) \rResolving deltas: 34% (167/489) \rResolving deltas: 35% (174/489) \rResolving deltas: 37% (184/489) \rResolving deltas: 40% (196/489) \rResolving deltas: 41% (202/489) \rResolving deltas: 42% (206/489) \rResolving deltas: 64% (317/489) \rResolving deltas: 65% (318/489) \rResolving deltas: 66% (325/489) \rResolving deltas: 67% (329/489) \rResolving deltas: 68% (333/489) \rResolving deltas: 71% (350/489) \rResolving deltas: 72% (353/489) \rResolving deltas: 74% (362/489) \rResolving deltas: 77% (380/489) \rResolving deltas: 80% (392/489) \rResolving deltas: 82% (403/489) \rResolving deltas: 87% (427/489) \rResolving deltas: 88% (433/489) \rResolving deltas: 89% (440/489) \rResolving deltas: 90% (441/489) \rResolving deltas: 91% (449/489) \rResolving deltas: 92% (454/489) \rResolving deltas: 93% (459/489) \rResolving deltas: 94% (463/489) \rResolving deltas: 95% (466/489) \rResolving deltas: 100% (489/489) \rResolving deltas: 100% (489/489), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "v2XvXm4bFp7h", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "cwd = os.getcwd()\n", + "os.chdir(\"cdQA\")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "i8-WoI4eFw_E", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 366 + }, + "outputId": "4a564950-1d3f-4627-8fa2-63c61fa579de" + }, + "source": [ + "!ls -la" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "total 84\n", + "drwxr-xr-x 7 root root 4096 Jul 15 15:04 .\n", + "drwxr-xr-x 1 root root 4096 Jul 15 15:04 ..\n", + "-rw-r--r-- 1 root root 815 Jul 15 15:04 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 15 15:04 apt.txt\n", + "drwxr-xr-x 6 root root 4096 Jul 15 15:04 cdqa\n", + "-rw-r--r-- 1 root root 1452 Jul 15 15:04 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 15 15:04 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 15 15:04 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 15 15:04 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 15 15:04 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 15 15:04 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 15 15:04 MANIFEST.in\n", + "-rw-r--r-- 1 root root 12184 Jul 15 15:04 README.md\n", + "-rw-r--r-- 1 root root 95 Jul 15 15:04 requirements.txt\n", + "-rw-r--r-- 1 root root 727 Jul 15 15:04 setup.py\n", + "drwxr-xr-x 2 root root 4096 Jul 15 15:04 tests\n", + "-rw-r--r-- 1 root root 312 Jul 15 15:04 .travis.yml\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5jBtSKczGF38", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 56 + }, + "outputId": "d6cd8cdc-94dd-432c-9df9-7ac93b25c5f0" + }, + "source": [ + "!git checkout sync-huggingface" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Branch 'sync-huggingface' set up to track remote branch 'sync-huggingface' from 'origin'.\n", + "Switched to a new branch 'sync-huggingface'\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DHl2HUX1GRd6", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "7dd4a79c-827a-4491-dc6b-7ab369dfbf3e" + }, + "source": [ + "!pip install -e ." + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Obtaining file:///content/cdQA\n", + "Requirement already satisfied: Flask in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (1.1.1)\n", + "Collecting flask_cors (from cdqa==1.0.3)\n", + " Downloading https://files.pythonhosted.org/packages/78/38/e68b11daa5d613e3a91e4bf3da76c94ac9ee0d9cd515af9c1ab80d36f709/Flask_Cors-3.0.8-py2.py3-none-any.whl\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.13.2)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.24.2)\n", + "Requirement already satisfied: prettytable in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.7.2)\n", + "Collecting pytorch_pretrained_bert (from cdqa==1.0.3)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)\n", + "\u001b[K |████████████████████████████████| 133kB 4.1MB/s \n", + "\u001b[?25hRequirement already satisfied: scikit_learn in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.21.2)\n", + "Collecting tika (from cdqa==1.0.3)\n", + " Downloading https://files.pythonhosted.org/packages/10/75/b566e446ffcf292f10c8d84c15a3d91615fe3d7ca8072a17c949d4e84b66/tika-1.19.tar.gz\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (4.28.1)\n", + "Collecting wget (from cdqa==1.0.3)\n", + " Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip\n", + "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (2.10.1)\n", + "Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (7.0)\n", + "Requirement already satisfied: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (0.15.4)\n", + "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (1.1.0)\n", + "Requirement already satisfied: Six in /usr/local/lib/python3.6/dist-packages (from flask_cors->cdqa==1.0.3) (1.12.0)\n", + "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2018.9)\n", + "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (1.16.4)\n", + "Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2.5.3)\n", + "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.9.185)\n", + "Collecting regex (from pytorch_pretrained_bert->cdqa==1.0.3)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB)\n", + "\u001b[K |████████████████████████████████| 655kB 8.9MB/s \n", + "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (2.21.0)\n", + "Requirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", + "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit_learn->cdqa==1.0.3) (1.3.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from tika->cdqa==1.0.3) (41.0.1)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->Flask->cdqa==1.0.3) (1.1.1)\n", + "Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.2.1)\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.9.4)\n", + "Requirement already satisfied: botocore<1.13.0,>=1.12.185 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (1.12.185)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2019.6.16)\n", + "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (1.24.3)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2.8)\n", + "Requirement already satisfied: docutils>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.13.0,>=1.12.185->boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.14)\n", + "Building wheels for collected packages: tika, wget, regex\n", + " Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Stored in directory: /root/.cache/pip/wheels/b4/db/8a/3a3f0c0725448eaa92703e3dda71e29dc13a119ff6c1036848\n", + " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f\n", + " Building wheel for regex (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Stored in directory: /root/.cache/pip/wheels/35/e4/80/abf3b33ba89cf65cd262af8a22a5a999cc28fbfabea6b38473\n", + "Successfully built tika wget regex\n", + "Installing collected packages: flask-cors, regex, pytorch-pretrained-bert, tika, wget, cdqa\n", + " Running setup.py develop for cdqa\n", + "Successfully installed cdqa flask-cors-3.0.8 pytorch-pretrained-bert-0.6.2 regex-2019.6.8 tika-1.19 wget-3.2\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_NWD3P6qH_8_", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import wget\n", + "\n", + "squad_urls = [\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json',\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json',\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json',\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json'\n", + "]\n", + "\n", + "for squad_url in squad_urls:\n", + " wget.download(url=squad_url, out='.')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "50r4anYBITRO", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 94 + }, + "outputId": "c4caa2d3-a1b1-4bd5-dece-36153bddf7d6" + }, + "source": [ + "!ls" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "api.py\t dev-v1.1.json LICENSE\t\tsetup.py\n", + "apt.txt dev-v2.0.json MANIFEST.in\ttests\n", + "cdqa\t download.py README.md\t\ttrain-v1.1.json\n", + "cdqa.egg-info examples requirements.txt\ttrain-v2.0.json\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oXW27LR3KvrT", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "outputId": "d9f17f5d-bff0-40ae-880c-4f86fa75daec" + }, + "source": [ + "!pip install tensorboardX" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting tensorboardX\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c3/12/dcaf67e1312475b26db9e45e7bb6f32b540671a9ee120b3a72d9e09bc517/tensorboardX-1.8-py2.py3-none-any.whl (216kB)\n", + "\r\u001b[K |█▌ | 10kB 13.3MB/s eta 0:00:01\r\u001b[K |███ | 20kB 1.8MB/s eta 0:00:01\r\u001b[K |████▌ | 30kB 2.7MB/s eta 0:00:01\r\u001b[K |██████ | 40kB 1.7MB/s eta 0:00:01\r\u001b[K |███████▋ | 51kB 2.2MB/s eta 0:00:01\r\u001b[K |█████████ | 61kB 2.6MB/s eta 0:00:01\r\u001b[K |██████████▋ | 71kB 3.0MB/s eta 0:00:01\r\u001b[K |████████████▏ | 81kB 3.4MB/s eta 0:00:01\r\u001b[K |█████████████▋ | 92kB 3.8MB/s eta 0:00:01\r\u001b[K |███████████████▏ | 102kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████▊ | 112kB 2.9MB/s eta 0:00:01\r\u001b[K |██████████████████▏ | 122kB 2.9MB/s eta 0:00:01\r\u001b[K |███████████████████▊ | 133kB 2.9MB/s eta 0:00:01\r\u001b[K |█████████████████████▏ | 143kB 2.9MB/s eta 0:00:01\r\u001b[K |██████████████████████▊ | 153kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████████████▎ | 163kB 2.9MB/s eta 0:00:01\r\u001b[K |█████████████████████████▊ | 174kB 2.9MB/s eta 0:00:01\r\u001b[K |███████████████████████████▎ | 184kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████████████████▉ | 194kB 2.9MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 204kB 2.9MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▉| 215kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 225kB 2.9MB/s \n", + "\u001b[?25hRequirement already satisfied: protobuf>=3.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardX) (3.7.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from tensorboardX) (1.16.4)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from tensorboardX) (1.12.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.2.0->tensorboardX) (41.0.1)\n", + "Installing collected packages: tensorboardX\n", + "Successfully installed tensorboardX-1.8\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-25T14:21:08.091797Z", + "start_time": "2019-06-25T14:21:03.027877Z" + }, + "id": "umJkmO9HFf3L", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "outputId": "2965eac0-1d9c-4699-f410-5437482e41c2" + }, + "source": [ + "import os\n", + "import torch\n", + "from sklearn.externals import joblib\n", + "from cdqa.reader.reader_sklearn import Reader\n", + "\n", + "reader = Reader(train_file='train-v2.0.json',\n", + " predict_file='dev-v2.0.json',\n", + " model_name='xlnet',\n", + " output_dir='.')\n", + "\n", + "reader.fit()" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "error", + "ename": "ModuleNotFoundError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexternals\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mjoblib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mcdqa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader_sklearn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mReader\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m reader = Reader(train_file='train-v2.0.json',\n", + "\u001b[0;32m/content/cdQA/cdqa/reader/reader_sklearn.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtensorboardX\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSummaryWriter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 35\u001b[0;31m from pytorch_transformers import (WEIGHTS_NAME, BertConfig,\n\u001b[0m\u001b[1;32m 36\u001b[0m \u001b[0mBertForQuestionAnswering\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBertTokenizer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0mXLMConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mXLMForQuestionAnswering\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pytorch_transformers'", + "", + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-25T14:21:26.472449Z", + "start_time": "2019-06-25T14:21:11.427052Z" + }, + "id": "NIwNr1DdFf3X", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# prediction = reader.predict(X='Since when does the Excellence Program of BNP Paribas exist?')" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From d20e06d307eeb6ddb998ecae6f7fabd789f8a849 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 16:21:00 +0200 Subject: [PATCH 11/43] add pytorch-transformers to reqs --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index fb09e7b4..41c0f8d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ joblib pandas prettytable pytorch_pretrained_bert +pytorch-transformers scikit_learn tika tqdm From 1a60319462423e6c607307cfad9a73f93dcb157c Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 16:48:11 +0200 Subject: [PATCH 12/43] sync with pytorch-transformers 1.0 --- cdqa/reader/hf_original_examples/run_squad.py | 119 ++++++++++++------ cdqa/reader/reader_sklearn.py | 111 +++++++++++----- 2 files changed, 158 insertions(+), 72 deletions(-) diff --git a/cdqa/reader/hf_original_examples/run_squad.py b/cdqa/reader/hf_original_examples/run_squad.py index af4a771f..e920ebe3 100644 --- a/cdqa/reader/hf_original_examples/run_squad.py +++ b/cdqa/reader/hf_original_examples/run_squad.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning a question-answering model (Bert, XLM, XLNet,...) on SQuAD.""" +""" Finetuning the library models for question-answering on SQuAD (Bert, XLM, XLNet).""" from __future__ import absolute_import, division, print_function @@ -21,7 +21,7 @@ import logging import os import random -from io import open +import glob import numpy as np import torch @@ -41,8 +41,13 @@ from pytorch_transformers import AdamW, WarmupLinearSchedule -from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions +from utils_squad import (read_squad_examples, convert_examples_to_features, + RawResult, write_predictions, + RawResultExtended, write_predictions_extended) +# The follwing import is the official SQuAD evaluation script (2.0). +# You can remove it from the dependencies if you are using this script outside of the library +# We've added it here for automated tests (see examples/test_examples.py file) from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad logger = logging.getLogger(__name__) @@ -63,6 +68,8 @@ def set_seed(args): if args.n_gpu > 0: torch.cuda.manual_seed_all(args.seed) +def to_list(tensor): + return tensor.detach().cpu().tolist() def train(args, train_dataset, model, tokenizer): """ Train the model """ @@ -115,15 +122,18 @@ def train(args, train_dataset, model, tokenizer): model.train() batch = tuple(t.to(args.device) for t in batch) inputs = {'input_ids': batch[0], - 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids + 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids 'attention_mask': batch[2], 'start_positions': batch[3], 'end_positions': batch[4]} + if args.model_type in ['xlnet', 'xlm']: + inputs.update({'cls_index': batch[5], + 'p_mask': batch[6]}) ouputs = model(**inputs) loss = ouputs[0] # model outputs are always tuple in pytorch-transformers (see doc) if args.n_gpu > 1: - loss = loss.mean() # mean() to average on multi-gpu parallel training + loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps @@ -169,6 +179,9 @@ def train(args, train_dataset, model, tokenizer): train_iterator.close() break + if args.local_rank in [-1, 0]: + tb_writer.close() + return global_step, tr_loss / global_step @@ -191,33 +204,52 @@ def evaluate(args, model, tokenizer, prefix=""): for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) - example_indices = batch[3] with torch.no_grad(): inputs = {'input_ids': batch[0], - 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids - 'attention_mask': batch[2]} + 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids + 'attention_mask': batch[2]} + example_indices = batch[3] + if args.model_type in ['xlnet', 'xlm']: + inputs.update({'cls_index': batch[4], + 'p_mask': batch[5]}) outputs = model(**inputs) - batch_start_logits, batch_end_logits = outputs[:2] for i, example_index in enumerate(example_indices): - start_logits = batch_start_logits[i].detach().cpu().tolist() - end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) - all_results.append(RawResult(unique_id=unique_id, - start_logits=start_logits, - end_logits=end_logits)) + if args.model_type in ['xlnet', 'xlm']: + # XLNet uses a more complex post-processing procedure + result = RawResultExtended(unique_id = unique_id, + start_top_log_probs = to_list(outputs[0][i]), + start_top_index = to_list(outputs[1][i]), + end_top_log_probs = to_list(outputs[2][i]), + end_top_index = to_list(outputs[3][i]), + cls_logits = to_list(outputs[4][i])) + else: + result = RawResult(unique_id = unique_id, + start_logits = to_list(outputs[0][i]), + end_logits = to_list(outputs[1][i])) + all_results.append(result) + # Compute predictions output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) - all_predictions = write_predictions(examples, features, all_results, - args.n_best_size, args.max_answer_length, - args.do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, - args.verbose_logging, args.version_2_with_negative, - args.null_score_diff_threshold) + if args.model_type in ['xlnet', 'xlm']: + # XLNet uses a more complex post-processing procedure + write_predictions_extended(examples, features, all_results, args.n_best_size, + args.max_answer_length, output_prediction_file, + output_nbest_file, output_null_log_odds_file, args.predict_file, + model.config.start_n_top, model.config.end_n_top, + args.version_2_with_negative, tokenizer, args.verbose_logging) + else: + write_predictions(examples, features, all_results, args.n_best_size, + args.max_answer_length, args.do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, args.verbose_logging, + args.version_2_with_negative, args.null_score_diff_threshold) + + # Evaluate with the official SQuAD script evaluate_options = EVAL_OPTS(data_file=args.predict_file, pred_file=output_prediction_file, na_prob_file=output_null_log_odds_file) @@ -230,7 +262,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal input_file = args.predict_file if evaluate else args.train_file cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( 'dev' if evaluate else 'train', - list(filter(None, args.model_name.split('/'))).pop(), + list(filter(None, args.model_name_or_path.split('/'))).pop(), str(args.max_seq_length))) if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: logger.info("Loading features from cached file %s", cached_features_file) @@ -238,8 +270,8 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal else: logger.info("Creating features from dataset file at %s", input_file) examples = read_squad_examples(input_file=input_file, - is_training=not evaluate, - version_2_with_negative=args.version_2_with_negative) + is_training=not evaluate, + version_2_with_negative=args.version_2_with_negative) features = convert_examples_to_features(examples=examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, @@ -254,13 +286,18 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) + all_cls_index = torch.tensor([f.cls_index for f in features], dtype=torch.long) + all_p_mask = torch.tensor([f.p_mask for f in features], dtype=torch.float) if evaluate: all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) - dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, + all_example_index, all_cls_index, all_p_mask) else: all_start_positions = torch.tensor([f.start_position for f in features], dtype=torch.long) all_end_positions = torch.tensor([f.end_position for f in features], dtype=torch.long) - dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, + all_start_positions, all_end_positions, + all_cls_index, all_p_mask) if output_examples: return dataset, examples, features @@ -275,8 +312,10 @@ def main(): help="SQuAD json for training. E.g., train-v1.1.json") parser.add_argument("--predict_file", default=None, type=str, required=True, help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") - parser.add_argument("--model_name", default=None, type=str, required=True, - help="Bert/XLNet/XLM pre-trained model selected in the list: " + ", ".join(ALL_MODELS)) + parser.add_argument("--model_type", default=None, type=str, required=True, + help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) + parser.add_argument("--model_name_or_path", default=None, type=str, required=True, + help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) parser.add_argument("--output_dir", default=None, type=str, required=True, help="The output directory where the model checkpoints and predictions will be written.") @@ -401,15 +440,11 @@ def main(): if args.local_rank not in [-1, 0]: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - args.model_type = "" - for key in MODEL_CLASSES: - if key in args.model_name.lower(): - args.model_type = key # take the first match in model types - break + args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] - config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name) - tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name, do_lower_case=args.do_lower_case) - model = model_class.from_pretrained(args.model_name, from_tf=bool('.ckpt' in args.model_name), config=config) + config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path) + tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case) + model = model_class.from_pretrained(args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path), config=config) if args.local_rank == 0: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab @@ -432,7 +467,7 @@ def main(): logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) - # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() + # Save the trained model and the tokenizer if args.local_rank == -1 or torch.distributed.get_rank() == 0: # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: @@ -454,22 +489,30 @@ def main(): model.to(args.device) - # Evaluation + # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory results = {} if args.do_eval and args.local_rank in [-1, 0]: checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) - logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging + logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs + logger.info("Evaluate the following checkpoints: %s", checkpoints) + for checkpoint in checkpoints: + # Reload the model global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" model = model_class.from_pretrained(checkpoint) model.to(args.device) + + # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) + result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) + logger.info("Results: {}".format(results)) + return results diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 8bef7389..0e605e52 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning a question-answering model (Bert, XLM, XLNet,...) on SQuAD.""" +""" Finetuning the library models for question-answering on SQuAD (Bert, XLM, XLNet).""" from __future__ import absolute_import, division, print_function @@ -21,7 +21,7 @@ import logging import os import random -from io import open +import glob import numpy as np import torch @@ -41,8 +41,13 @@ from pytorch_transformers import AdamW, WarmupLinearSchedule -from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions +from utils_squad import (read_squad_examples, convert_examples_to_features, + RawResult, write_predictions, + RawResultExtended, write_predictions_extended) +# The follwing import is the official SQuAD evaluation script (2.0). +# You can remove it from the dependencies if you are using this script outside of the library +# We've added it here for automated tests (see examples/test_examples.py file) from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad from sklearn.base import BaseEstimator @@ -65,6 +70,8 @@ def set_seed(args): if args.n_gpu > 0: torch.cuda.manual_seed_all(args.seed) +def to_list(tensor): + return tensor.detach().cpu().tolist() def train(args, train_dataset, model, tokenizer): """ Train the model """ @@ -117,15 +124,18 @@ def train(args, train_dataset, model, tokenizer): model.train() batch = tuple(t.to(args.device) for t in batch) inputs = {'input_ids': batch[0], - 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids + 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids 'attention_mask': batch[2], 'start_positions': batch[3], 'end_positions': batch[4]} + if args.model_type in ['xlnet', 'xlm']: + inputs.update({'cls_index': batch[5], + 'p_mask': batch[6]}) ouputs = model(**inputs) loss = ouputs[0] # model outputs are always tuple in pytorch-transformers (see doc) if args.n_gpu > 1: - loss = loss.mean() # mean() to average on multi-gpu parallel training + loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps @@ -171,6 +181,9 @@ def train(args, train_dataset, model, tokenizer): train_iterator.close() break + if args.local_rank in [-1, 0]: + tb_writer.close() + return global_step, tr_loss / global_step @@ -193,33 +206,52 @@ def evaluate(args, model, tokenizer, prefix=""): for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) - example_indices = batch[3] with torch.no_grad(): inputs = {'input_ids': batch[0], - 'token_type_ids': batch[1] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids + 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids 'attention_mask': batch[2]} + example_indices = batch[3] + if args.model_type in ['xlnet', 'xlm']: + inputs.update({'cls_index': batch[4], + 'p_mask': batch[5]}) outputs = model(**inputs) - batch_start_logits, batch_end_logits = outputs[:2] for i, example_index in enumerate(example_indices): - start_logits = batch_start_logits[i].detach().cpu().tolist() - end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) - all_results.append(RawResult(unique_id=unique_id, - start_logits=start_logits, - end_logits=end_logits)) - + if args.model_type in ['xlnet', 'xlm']: + # XLNet uses a more complex post-processing procedure + result = RawResultExtended(unique_id = unique_id, + start_top_log_probs = to_list(outputs[0][i]), + start_top_index = to_list(outputs[1][i]), + end_top_log_probs = to_list(outputs[2][i]), + end_top_index = to_list(outputs[3][i]), + cls_logits = to_list(outputs[4][i])) + else: + result = RawResult(unique_id = unique_id, + start_logits = to_list(outputs[0][i]), + end_logits = to_list(outputs[1][i])) + all_results.append(result) + + # Compute predictions output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) - all_predictions = write_predictions(examples, features, all_results, - args.n_best_size, args.max_answer_length, - args.do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, - args.verbose_logging, args.version_2_with_negative, - args.null_score_diff_threshold) + if args.model_type in ['xlnet', 'xlm']: + # XLNet uses a more complex post-processing procedure + write_predictions_extended(examples, features, all_results, args.n_best_size, + args.max_answer_length, output_prediction_file, + output_nbest_file, output_null_log_odds_file, args.predict_file, + model.config.start_n_top, model.config.end_n_top, + args.version_2_with_negative, tokenizer, args.verbose_logging) + else: + write_predictions(examples, features, all_results, args.n_best_size, + args.max_answer_length, args.do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, args.verbose_logging, + args.version_2_with_negative, args.null_score_diff_threshold) + + # Evaluate with the official SQuAD script evaluate_options = EVAL_OPTS(data_file=args.predict_file, pred_file=output_prediction_file, na_prob_file=output_null_log_odds_file) @@ -232,7 +264,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal input_file = args.predict_file if evaluate else args.train_file cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( 'dev' if evaluate else 'train', - list(filter(None, args.model_name.split('/'))).pop(), + list(filter(None, args.model_name_or_path.split('/'))).pop(), str(args.max_seq_length))) if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: logger.info("Loading features from cached file %s", cached_features_file) @@ -256,13 +288,18 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) + all_cls_index = torch.tensor([f.cls_index for f in features], dtype=torch.long) + all_p_mask = torch.tensor([f.p_mask for f in features], dtype=torch.float) if evaluate: all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) - dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, + all_example_index, all_cls_index, all_p_mask) else: all_start_positions = torch.tensor([f.start_position for f in features], dtype=torch.long) all_end_positions = torch.tensor([f.end_position for f in features], dtype=torch.long) - dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions) + dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, + all_start_positions, all_end_positions, + all_cls_index, all_p_mask) if output_examples: return dataset, examples, features @@ -276,7 +313,8 @@ class Reader(BaseEstimator): def __init__(self, train_file=None, predict_file=None, - model_name=None, + model_type=None, + model_name_or_path=None, output_dir=None, config_name="", tokenizer_name="", @@ -391,15 +429,11 @@ def __init__(self, if self.local_rank not in [-1, 0]: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - self.model_type = "" - for key in MODEL_CLASSES: - if key in self.model_name.lower(): - self.model_type = key # take the first match in model types - break + self.model_type = self.model_type.lower() config_class, self.model_class, tokenizer_class = MODEL_CLASSES[self.model_type] - config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name) - self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name, do_lower_case=self.do_lower_case) - self.model = self.model_class.from_pretrained(self.model_name, from_tf=bool('.ckpt' in self.model_name), config=config) + config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name_or_path) + self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name_or_path, do_lower_case=self.do_lower_case) + self.model = self.model_class.from_pretrained(self.model_name_or_path, from_tf=bool('.ckpt' in self.model_name_or_path), config=config) if self.local_rank == 0: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab @@ -430,7 +464,7 @@ def fit(self, X, y=None): global_step, tr_loss = train(self, train_dataset, self.model, self.tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) - # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() + # Save the trained model and the tokenizer if self.local_rank == -1 or torch.distributed.get_rank() == 0: # Create output directory if needed if not os.path.exists(self.output_dir) and self.local_rank in [-1, 0]: @@ -450,21 +484,30 @@ def fit(self, X, y=None): def evaluate(self, X): + # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory results = {} if self.local_rank in [-1, 0]: checkpoints = [self.output_dir] if self.eval_all_checkpoints: checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(self.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) - logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging + logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs + logger.info("Evaluate the following checkpoints: %s", checkpoints) + for checkpoint in checkpoints: + # Reload the model global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" self.model = self.model_class.from_pretrained(checkpoint) self.model.to(self.device) + + # Evaluate result = evaluate(self, self.model, self.tokenizer, prefix=global_step) + result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) + logger.info("Results: {}".format(results)) + return results def predict(self, X): From 1681d971c35146df6cd268317910d70c8b3acbb0 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 16:51:15 +0200 Subject: [PATCH 13/43] eval script SQuAD update --- .../utils_squad_evaluate.py | 43 ++++++++++++++++++- cdqa/reader/utils_squad_evaluate.py | 43 ++++++++++++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/cdqa/reader/hf_original_examples/utils_squad_evaluate.py b/cdqa/reader/hf_original_examples/utils_squad_evaluate.py index d0cf643f..ed162e6f 100644 --- a/cdqa/reader/hf_original_examples/utils_squad_evaluate.py +++ b/cdqa/reader/hf_original_examples/utils_squad_evaluate.py @@ -1,4 +1,5 @@ -"""Official evaluation script for SQuAD version 2.0. +""" Official evaluation script for SQuAD version 2.0. + Modified by XLNet authors to update `find_best_threshold` scripts for SQuAD V2.0 In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an additional na_prob.json file is provided. @@ -232,6 +233,36 @@ def find_best_thresh(preds, scores, na_probs, qid_to_has_ans): best_thresh = na_probs[qid] return 100.0 * best_score / len(scores), best_thresh +def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans): + num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k]) + cur_score = num_no_ans + best_score = cur_score + best_thresh = 0.0 + qid_list = sorted(na_probs, key=lambda k: na_probs[k]) + for i, qid in enumerate(qid_list): + if qid not in scores: continue + if qid_to_has_ans[qid]: + diff = scores[qid] + else: + if preds[qid]: + diff = -1 + else: + diff = 0 + cur_score += diff + if cur_score > best_score: + best_score = cur_score + best_thresh = na_probs[qid] + + has_ans_score, has_ans_cnt = 0, 0 + for qid in qid_list: + if not qid_to_has_ans[qid]: continue + has_ans_cnt += 1 + + if qid not in scores: continue + has_ans_score += scores[qid] + + return 100.0 * best_score / len(scores), best_thresh, 1.0 * has_ans_score / has_ans_cnt + def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans): best_exact, exact_thresh = find_best_thresh(preds, exact_raw, na_probs, qid_to_has_ans) best_f1, f1_thresh = find_best_thresh(preds, f1_raw, na_probs, qid_to_has_ans) @@ -240,6 +271,16 @@ def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_h main_eval['best_f1'] = best_f1 main_eval['best_f1_thresh'] = f1_thresh +def find_all_best_thresh_v2(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans): + best_exact, exact_thresh, has_ans_exact = find_best_thresh_v2(preds, exact_raw, na_probs, qid_to_has_ans) + best_f1, f1_thresh, has_ans_f1 = find_best_thresh_v2(preds, f1_raw, na_probs, qid_to_has_ans) + main_eval['best_exact'] = best_exact + main_eval['best_exact_thresh'] = exact_thresh + main_eval['best_f1'] = best_f1 + main_eval['best_f1_thresh'] = f1_thresh + main_eval['has_ans_exact'] = has_ans_exact + main_eval['has_ans_f1'] = has_ans_f1 + def main(OPTS): with open(OPTS.data_file) as f: dataset_json = json.load(f) diff --git a/cdqa/reader/utils_squad_evaluate.py b/cdqa/reader/utils_squad_evaluate.py index d0cf643f..ed162e6f 100644 --- a/cdqa/reader/utils_squad_evaluate.py +++ b/cdqa/reader/utils_squad_evaluate.py @@ -1,4 +1,5 @@ -"""Official evaluation script for SQuAD version 2.0. +""" Official evaluation script for SQuAD version 2.0. + Modified by XLNet authors to update `find_best_threshold` scripts for SQuAD V2.0 In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an additional na_prob.json file is provided. @@ -232,6 +233,36 @@ def find_best_thresh(preds, scores, na_probs, qid_to_has_ans): best_thresh = na_probs[qid] return 100.0 * best_score / len(scores), best_thresh +def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans): + num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k]) + cur_score = num_no_ans + best_score = cur_score + best_thresh = 0.0 + qid_list = sorted(na_probs, key=lambda k: na_probs[k]) + for i, qid in enumerate(qid_list): + if qid not in scores: continue + if qid_to_has_ans[qid]: + diff = scores[qid] + else: + if preds[qid]: + diff = -1 + else: + diff = 0 + cur_score += diff + if cur_score > best_score: + best_score = cur_score + best_thresh = na_probs[qid] + + has_ans_score, has_ans_cnt = 0, 0 + for qid in qid_list: + if not qid_to_has_ans[qid]: continue + has_ans_cnt += 1 + + if qid not in scores: continue + has_ans_score += scores[qid] + + return 100.0 * best_score / len(scores), best_thresh, 1.0 * has_ans_score / has_ans_cnt + def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans): best_exact, exact_thresh = find_best_thresh(preds, exact_raw, na_probs, qid_to_has_ans) best_f1, f1_thresh = find_best_thresh(preds, f1_raw, na_probs, qid_to_has_ans) @@ -240,6 +271,16 @@ def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_h main_eval['best_f1'] = best_f1 main_eval['best_f1_thresh'] = f1_thresh +def find_all_best_thresh_v2(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans): + best_exact, exact_thresh, has_ans_exact = find_best_thresh_v2(preds, exact_raw, na_probs, qid_to_has_ans) + best_f1, f1_thresh, has_ans_f1 = find_best_thresh_v2(preds, f1_raw, na_probs, qid_to_has_ans) + main_eval['best_exact'] = best_exact + main_eval['best_exact_thresh'] = exact_thresh + main_eval['best_f1'] = best_f1 + main_eval['best_f1_thresh'] = f1_thresh + main_eval['has_ans_exact'] = has_ans_exact + main_eval['has_ans_f1'] = has_ans_f1 + def main(OPTS): with open(OPTS.data_file) as f: dataset_json = json.load(f) From 2c86a94eb973f39993e5d4da8b9961a0550c6b54 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 17:01:48 +0200 Subject: [PATCH 14/43] sync with pytorch-transformers 1.0 --- .../hf_original_examples/utils_squad.py | 295 ++++++++++++++++-- cdqa/reader/utils_squad.py | 294 +++++++++++++++-- 2 files changed, 547 insertions(+), 42 deletions(-) diff --git a/cdqa/reader/hf_original_examples/utils_squad.py b/cdqa/reader/hf_original_examples/utils_squad.py index 305eeb7b..34a0c9cc 100644 --- a/cdqa/reader/hf_original_examples/utils_squad.py +++ b/cdqa/reader/hf_original_examples/utils_squad.py @@ -26,6 +26,9 @@ from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize +# Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method) +from utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores + logger = logging.getLogger(__name__) @@ -82,6 +85,9 @@ def __init__(self, input_ids, input_mask, segment_ids, + cls_index, + p_mask, + paragraph_len, start_position=None, end_position=None, is_impossible=None): @@ -94,6 +100,9 @@ def __init__(self, self.input_ids = input_ids self.input_mask = input_mask self.segment_ids = segment_ids + self.cls_index = cls_index + self.p_mask = p_mask + self.paragraph_len = paragraph_len self.start_position = start_position self.end_position = end_position self.is_impossible = is_impossible @@ -178,13 +187,25 @@ def is_whitespace(c): def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training): + doc_stride, max_query_length, is_training, + cls_token_at_end=False, + cls_token='[CLS]', sep_token='[SEP]', pad_token=0, + sequence_a_segment_id=0, sequence_b_segment_id=1, + cls_token_segment_id=0, pad_token_segment_id=0, + mask_padding_with_zero=True): """Loads a data file into a list of `InputBatch`s.""" unique_id = 1000000000 + # cnt_pos, cnt_neg = 0, 0 + # max_N, max_M = 1024, 1024 + # f = np.zeros((max_N, max_M), dtype=np.float32) features = [] for (example_index, example) in enumerate(examples): + + # if example_index % 100 == 0: + # logger.info('Converting %s/%s pos %s neg %s', example_index, len(examples), cnt_pos, cnt_neg) + query_tokens = tokenizer.tokenize(example.question_text) if len(query_tokens) > max_query_length: @@ -239,14 +260,30 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, token_to_orig_map = {} token_is_max_context = {} segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) + + # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer) + # Original TF implem also keep the classification token (set to 0) (not sure why...) + p_mask = [] + + # CLS token at the beginning + if not cls_token_at_end: + tokens.append(cls_token) + segment_ids.append(cls_token_segment_id) + p_mask.append(0) + cls_index = 0 + + # Query for token in query_tokens: tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) + segment_ids.append(sequence_a_segment_id) + p_mask.append(1) + # SEP token + tokens.append(sep_token) + segment_ids.append(sequence_a_segment_id) + p_mask.append(1) + + # Paragraph for i in range(doc_span.length): split_token_index = doc_span.start + i token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] @@ -255,29 +292,43 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, split_token_index) token_is_max_context[len(tokens)] = is_max_context tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) + segment_ids.append(sequence_b_segment_id) + p_mask.append(0) + paragraph_len = doc_span.length + + # SEP token + tokens.append(sep_token) + segment_ids.append(sequence_b_segment_id) + p_mask.append(1) + + # CLS token at the end + if cls_token_at_end: + tokens.append(cls_token) + segment_ids.append(cls_token_segment_id) + p_mask.append(0) + cls_index = len(tokens) - 1 # Index of classification token input_ids = tokenizer.convert_tokens_to_ids(tokens) # The mask has 1 for real tokens and 0 for padding tokens. Only real # tokens are attended to. - input_mask = [1] * len(input_ids) + input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) # Zero-pad up to the sequence length. while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) + input_ids.append(pad_token) + input_mask.append(0 if mask_padding_with_zero else 1) + segment_ids.append(pad_token_segment_id) + p_mask.append(1) assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length assert len(segment_ids) == max_seq_length + span_is_impossible = example.is_impossible start_position = None end_position = None - if is_training and not example.is_impossible: + if is_training and not span_is_impossible: # For training, if our document chunk does not contain an annotation # we throw it out, since there is nothing to predict. doc_start = doc_span.start @@ -289,13 +340,16 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, if out_of_span: start_position = 0 end_position = 0 + span_is_impossible = True else: doc_offset = len(query_tokens) + 2 start_position = tok_start_position - doc_start + doc_offset end_position = tok_end_position - doc_start + doc_offset - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 + + if is_training and span_is_impossible: + start_position = cls_index + end_position = cls_index + if example_index < 20: logger.info("*** Example ***") logger.info("unique_id: %s" % (unique_id)) @@ -312,9 +366,9 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, "input_mask: %s" % " ".join([str(x) for x in input_mask])) logger.info( "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) - if is_training and example.is_impossible: + if is_training and span_is_impossible: logger.info("impossible example") - if is_training and not example.is_impossible: + if is_training and not span_is_impossible: answer_text = " ".join(tokens[start_position:(end_position + 1)]) logger.info("start_position: %d" % (start_position)) logger.info("end_position: %d" % (end_position)) @@ -332,9 +386,12 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, + cls_index=cls_index, + p_mask=p_mask, + paragraph_len=paragraph_len, start_position=start_position, end_position=end_position, - is_impossible=example.is_impossible)) + is_impossible=span_is_impossible)) unique_id += 1 return features @@ -417,7 +474,6 @@ def _check_is_max_context(doc_spans, cur_span_index, position): RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) - def write_predictions(all_examples, all_features, all_results, n_best_size, max_answer_length, do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, verbose_logging, @@ -612,6 +668,203 @@ def write_predictions(all_examples, all_features, all_results, n_best_size, return all_predictions +# For XLNet (and XLM which uses the same head) +RawResultExtended = collections.namedtuple("RawResultExtended", + ["unique_id", "start_top_log_probs", "start_top_index", + "end_top_log_probs", "end_top_index", "cls_logits"]) + + +def write_predictions_extended(all_examples, all_features, all_results, n_best_size, + max_answer_length, output_prediction_file, + output_nbest_file, + output_null_log_odds_file, orig_data_file, + start_n_top, end_n_top, version_2_with_negative, + tokenizer, verbose_logging): + """ XLNet write prediction logic (more complex than Bert's). + Write final predictions to the json file and log-odds of null if needed. + + Requires utils_squad_evaluate.py + """ + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", + "start_log_prob", "end_log_prob"]) + + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_log_prob", "end_log_prob"]) + + logger.info("Writing predictions to: %s", output_prediction_file) + # logger.info("Writing nbest to: %s" % (output_nbest_file)) + + example_index_to_features = collections.defaultdict(list) + for feature in all_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + scores_diff_json = collections.OrderedDict() + + for (example_index, example) in enumerate(all_examples): + features = example_index_to_features[example_index] + + prelim_predictions = [] + # keep track of the minimum score of null start+end of position 0 + score_null = 1000000 # large and positive + + for (feature_index, feature) in enumerate(features): + result = unique_id_to_result[feature.unique_id] + + cur_null_score = result.cls_logits + + # if we could have irrelevant answers, get the min score of irrelevant + score_null = min(score_null, cur_null_score) + + for i in range(start_n_top): + for j in range(end_n_top): + start_log_prob = result.start_top_log_probs[i] + start_index = result.start_top_index[i] + + j_index = i * end_n_top + j + + end_log_prob = result.end_top_log_probs[j_index] + end_index = result.end_top_index[j_index] + + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= feature.paragraph_len - 1: + continue + if end_index >= feature.paragraph_len - 1: + continue + + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_log_prob=start_log_prob, + end_log_prob=end_log_prob)) + + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_log_prob + x.end_log_prob), + reverse=True) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_index] + + # XLNet un-tokenizer + # Let's keep it simple for now and see if we need all this later. + # + # tok_start_to_orig_index = feature.tok_start_to_orig_index + # tok_end_to_orig_index = feature.tok_end_to_orig_index + # start_orig_pos = tok_start_to_orig_index[pred.start_index] + # end_orig_pos = tok_end_to_orig_index[pred.end_index] + # paragraph_text = example.paragraph_text + # final_text = paragraph_text[start_orig_pos: end_orig_pos + 1].strip() + + # Previously used Bert untokenizer + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = tokenizer.convert_tokens_to_string(tok_tokens) + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, tokenizer.do_lower_case, + verbose_logging) + + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + + nbest.append( + _NbestPrediction( + text=final_text, + start_log_prob=pred.start_log_prob, + end_log_prob=pred.end_log_prob)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="", start_log_prob=-1e6, + end_log_prob=-1e6)) + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_log_prob + entry.end_log_prob) + if not best_non_null_entry: + best_non_null_entry = entry + + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_log_prob"] = entry.start_log_prob + output["end_log_prob"] = entry.end_log_prob + nbest_json.append(output) + + assert len(nbest_json) >= 1 + assert best_non_null_entry is not None + + score_diff = score_null + scores_diff_json[example.qas_id] = score_diff + # note(zhiliny): always predict best_non_null_entry + # and the evaluation script will search for the best threshold + all_predictions[example.qas_id] = best_non_null_entry.text + + all_nbest_json[example.qas_id] = nbest_json + + with open(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") + + with open(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + + if version_2_with_negative: + with open(output_null_log_odds_file, "w") as writer: + writer.write(json.dumps(scores_diff_json, indent=4) + "\n") + + with open(orig_data_file, "r", encoding='utf-8') as reader: + orig_data = json.load(reader)["data"] + + qid_to_has_ans = make_qid_to_has_ans(orig_data) + has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] + no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] + exact_raw, f1_raw = get_raw_scores(orig_data, all_predictions) + out_eval = {} + + find_all_best_thresh_v2(out_eval, all_predictions, exact_raw, f1_raw, scores_diff_json, qid_to_has_ans) + + return out_eval + + def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): """Project the tokenized prediction back to the original text.""" diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 27fafa02..6e1f3976 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -26,6 +26,9 @@ from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize +# Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method) +from utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores + logger = logging.getLogger(__name__) @@ -86,6 +89,9 @@ def __init__(self, input_ids, input_mask, segment_ids, + cls_index, + p_mask, + paragraph_len, start_position=None, end_position=None, is_impossible=None): @@ -98,6 +104,9 @@ def __init__(self, self.input_ids = input_ids self.input_mask = input_mask self.segment_ids = segment_ids + self.cls_index = cls_index + self.p_mask = p_mask + self.paragraph_len = paragraph_len self.start_position = start_position self.end_position = end_position self.is_impossible = is_impossible @@ -188,13 +197,25 @@ def is_whitespace(c): def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training, verbose): + doc_stride, max_query_length, is_training, + cls_token_at_end=False, + cls_token='[CLS]', sep_token='[SEP]', pad_token=0, + sequence_a_segment_id=0, sequence_b_segment_id=1, + cls_token_segment_id=0, pad_token_segment_id=0, + mask_padding_with_zero=True): """Loads a data file into a list of `InputBatch`s.""" unique_id = 1000000000 + # cnt_pos, cnt_neg = 0, 0 + # max_N, max_M = 1024, 1024 + # f = np.zeros((max_N, max_M), dtype=np.float32) features = [] for (example_index, example) in enumerate(examples): + + # if example_index % 100 == 0: + # logger.info('Converting %s/%s pos %s neg %s', example_index, len(examples), cnt_pos, cnt_neg) + query_tokens = tokenizer.tokenize(example.question_text) if len(query_tokens) > max_query_length: @@ -249,14 +270,30 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, token_to_orig_map = {} token_is_max_context = {} segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) + + # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer) + # Original TF implem also keep the classification token (set to 0) (not sure why...) + p_mask = [] + + # CLS token at the beginning + if not cls_token_at_end: + tokens.append(cls_token) + segment_ids.append(cls_token_segment_id) + p_mask.append(0) + cls_index = 0 + + # Query for token in query_tokens: tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) + segment_ids.append(sequence_a_segment_id) + p_mask.append(1) + # SEP token + tokens.append(sep_token) + segment_ids.append(sequence_a_segment_id) + p_mask.append(1) + + # Paragraph for i in range(doc_span.length): split_token_index = doc_span.start + i token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] @@ -265,29 +302,43 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, split_token_index) token_is_max_context[len(tokens)] = is_max_context tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) + segment_ids.append(sequence_b_segment_id) + p_mask.append(0) + paragraph_len = doc_span.length + + # SEP token + tokens.append(sep_token) + segment_ids.append(sequence_b_segment_id) + p_mask.append(1) + + # CLS token at the end + if cls_token_at_end: + tokens.append(cls_token) + segment_ids.append(cls_token_segment_id) + p_mask.append(0) + cls_index = len(tokens) - 1 # Index of classification token input_ids = tokenizer.convert_tokens_to_ids(tokens) # The mask has 1 for real tokens and 0 for padding tokens. Only real # tokens are attended to. - input_mask = [1] * len(input_ids) + input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) # Zero-pad up to the sequence length. while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) + input_ids.append(pad_token) + input_mask.append(0 if mask_padding_with_zero else 1) + segment_ids.append(pad_token_segment_id) + p_mask.append(1) assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length assert len(segment_ids) == max_seq_length + span_is_impossible = example.is_impossible start_position = None end_position = None - if is_training and not example.is_impossible: + if is_training and not span_is_impossible: # For training, if our document chunk does not contain an annotation # we throw it out, since there is nothing to predict. doc_start = doc_span.start @@ -299,13 +350,16 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, if out_of_span: start_position = 0 end_position = 0 + span_is_impossible = True else: doc_offset = len(query_tokens) + 2 start_position = tok_start_position - doc_start + doc_offset end_position = tok_end_position - doc_start + doc_offset - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 + + if is_training and span_is_impossible: + start_position = cls_index + end_position = cls_index + if example_index < 20 and verbose: logger.info("*** Example ***") logger.info("unique_id: %s" % (unique_id)) @@ -322,9 +376,9 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, "input_mask: %s" % " ".join([str(x) for x in input_mask])) logger.info( "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) - if is_training and example.is_impossible: + if is_training and span_is_impossible: logger.info("impossible example") - if is_training and not example.is_impossible: + if is_training and not span_is_impossible: answer_text = " ".join(tokens[start_position:(end_position + 1)]) logger.info("start_position: %d" % (start_position)) logger.info("end_position: %d" % (end_position)) @@ -342,9 +396,12 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, + cls_index=cls_index, + p_mask=p_mask, + paragraph_len=paragraph_len, start_position=start_position, end_position=end_position, - is_impossible=example.is_impossible)) + is_impossible=span_is_impossible)) unique_id += 1 return features @@ -427,7 +484,6 @@ def _check_is_max_context(doc_spans, cur_span_index, position): RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) - def write_predictions(all_examples, all_features, all_results, n_best_size, max_answer_length, do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, verbose_logging, @@ -639,6 +695,202 @@ def write_predictions(all_examples, all_features, all_results, n_best_size, return final_prediction, all_predictions, all_nbest_json, scores_diff_json +# For XLNet (and XLM which uses the same head) +RawResultExtended = collections.namedtuple("RawResultExtended", + ["unique_id", "start_top_log_probs", "start_top_index", + "end_top_log_probs", "end_top_index", "cls_logits"]) + + +def write_predictions_extended(all_examples, all_features, all_results, n_best_size, + max_answer_length, output_prediction_file, + output_nbest_file, + output_null_log_odds_file, orig_data_file, + start_n_top, end_n_top, version_2_with_negative, + tokenizer, verbose_logging): + """ XLNet write prediction logic (more complex than Bert's). + Write final predictions to the json file and log-odds of null if needed. + + Requires utils_squad_evaluate.py + """ + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", + "start_log_prob", "end_log_prob"]) + + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_log_prob", "end_log_prob"]) + + logger.info("Writing predictions to: %s", output_prediction_file) + # logger.info("Writing nbest to: %s" % (output_nbest_file)) + + example_index_to_features = collections.defaultdict(list) + for feature in all_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + scores_diff_json = collections.OrderedDict() + + for (example_index, example) in enumerate(all_examples): + features = example_index_to_features[example_index] + + prelim_predictions = [] + # keep track of the minimum score of null start+end of position 0 + score_null = 1000000 # large and positive + + for (feature_index, feature) in enumerate(features): + result = unique_id_to_result[feature.unique_id] + + cur_null_score = result.cls_logits + + # if we could have irrelevant answers, get the min score of irrelevant + score_null = min(score_null, cur_null_score) + + for i in range(start_n_top): + for j in range(end_n_top): + start_log_prob = result.start_top_log_probs[i] + start_index = result.start_top_index[i] + + j_index = i * end_n_top + j + + end_log_prob = result.end_top_log_probs[j_index] + end_index = result.end_top_index[j_index] + + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= feature.paragraph_len - 1: + continue + if end_index >= feature.paragraph_len - 1: + continue + + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_log_prob=start_log_prob, + end_log_prob=end_log_prob)) + + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_log_prob + x.end_log_prob), + reverse=True) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_index] + + # XLNet un-tokenizer + # Let's keep it simple for now and see if we need all this later. + # + # tok_start_to_orig_index = feature.tok_start_to_orig_index + # tok_end_to_orig_index = feature.tok_end_to_orig_index + # start_orig_pos = tok_start_to_orig_index[pred.start_index] + # end_orig_pos = tok_end_to_orig_index[pred.end_index] + # paragraph_text = example.paragraph_text + # final_text = paragraph_text[start_orig_pos: end_orig_pos + 1].strip() + + # Previously used Bert untokenizer + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = tokenizer.convert_tokens_to_string(tok_tokens) + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, tokenizer.do_lower_case, + verbose_logging) + + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + + nbest.append( + _NbestPrediction( + text=final_text, + start_log_prob=pred.start_log_prob, + end_log_prob=pred.end_log_prob)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="", start_log_prob=-1e6, + end_log_prob=-1e6)) + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_log_prob + entry.end_log_prob) + if not best_non_null_entry: + best_non_null_entry = entry + + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_log_prob"] = entry.start_log_prob + output["end_log_prob"] = entry.end_log_prob + nbest_json.append(output) + + assert len(nbest_json) >= 1 + assert best_non_null_entry is not None + + score_diff = score_null + scores_diff_json[example.qas_id] = score_diff + # note(zhiliny): always predict best_non_null_entry + # and the evaluation script will search for the best threshold + all_predictions[example.qas_id] = best_non_null_entry.text + + all_nbest_json[example.qas_id] = nbest_json + + with open(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") + + with open(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + + if version_2_with_negative: + with open(output_null_log_odds_file, "w") as writer: + writer.write(json.dumps(scores_diff_json, indent=4) + "\n") + + with open(orig_data_file, "r", encoding='utf-8') as reader: + orig_data = json.load(reader)["data"] + + qid_to_has_ans = make_qid_to_has_ans(orig_data) + has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] + no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] + exact_raw, f1_raw = get_raw_scores(orig_data, all_predictions) + out_eval = {} + + find_all_best_thresh_v2(out_eval, all_predictions, exact_raw, f1_raw, scores_diff_json, qid_to_has_ans) + + return out_eval + def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): """Project the tokenized prediction back to the original text.""" From 3c34c6374d1d3d734aa4380f51c6b3dd830d21dd Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 18:23:54 +0200 Subject: [PATCH 15/43] fix import errors --- cdqa/reader/reader_sklearn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 0e605e52..d666e815 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -41,14 +41,14 @@ from pytorch_transformers import AdamW, WarmupLinearSchedule -from utils_squad import (read_squad_examples, convert_examples_to_features, +from cdqa.reader.utils_squad import (read_squad_examples, convert_examples_to_features, RawResult, write_predictions, RawResultExtended, write_predictions_extended) # The follwing import is the official SQuAD evaluation script (2.0). # You can remove it from the dependencies if you are using this script outside of the library # We've added it here for automated tests (see examples/test_examples.py file) -from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad +from cdqa.reader.utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad from sklearn.base import BaseEstimator From 02d81de5a227ce65b13d1a37ae54cfa0e660d7ac Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 18:29:28 +0200 Subject: [PATCH 16/43] add new reqs --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 41c0f8d3..6e3a45e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ pandas prettytable pytorch_pretrained_bert pytorch-transformers +tensorboardX scikit_learn tika tqdm From 95f214ad29361dadf669cf98591135b267d385a0 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 18:36:44 +0200 Subject: [PATCH 17/43] fix import error --- cdqa/reader/utils_squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 6e1f3976..ee89ef0e 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -27,7 +27,7 @@ from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize # Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method) -from utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores +from cdqa.reader.utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores logger = logging.getLogger(__name__) From e911d4cc583570189c8ab454ce6c552890d432e9 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 18:45:03 +0200 Subject: [PATCH 18/43] fix change params Reader() --- cdqa/reader/reader_sklearn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index d666e815..4145f762 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -357,7 +357,8 @@ def __init__(self, self.train_file = train_file self.predict_file = predict_file - self.model_name = model_name + self.model_type = model_type + self.model_name_or_path = model_name_or_path self.output_dir = output_dir self.config_name = config_name self.tokenizer_name = tokenizer_name From 53887d777efb1f772d3190707e8cc88ce514d4cf Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 19:02:42 +0200 Subject: [PATCH 19/43] remove verbose debug --- cdqa/reader/utils_squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index ee89ef0e..86e800d3 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -360,7 +360,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, start_position = cls_index end_position = cls_index - if example_index < 20 and verbose: + if example_index < 20: logger.info("*** Example ***") logger.info("unique_id: %s" % (unique_id)) logger.info("example_index: %s" % (example_index)) From 3501a403f855e42d03f3129725c20cbf8e198f65 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Tue, 16 Jul 2019 19:44:22 +0200 Subject: [PATCH 20/43] update training notebook --- examples/tutorial-train-xlnet-squad.ipynb | 846 +++++++++++++++++++--- 1 file changed, 726 insertions(+), 120 deletions(-) diff --git a/examples/tutorial-train-xlnet-squad.ipynb b/examples/tutorial-train-xlnet-squad.ipynb index e1c54a9c..b6d08ae7 100644 --- a/examples/tutorial-train-xlnet-squad.ipynb +++ b/examples/tutorial-train-xlnet-squad.ipynb @@ -31,11 +31,11 @@ "metadata": { "id": "zNtCqwveFjcK", "colab_type": "code", + "outputId": "5976e1e4-179d-463f-8271-2436e0d32a4e", "colab": { "base_uri": "https://localhost:8080/", - "height": 153 - }, - "outputId": "03a577cd-62e2-405e-aa3e-a35314400191" + "height": 151 + } }, "source": [ "!git clone https://github.com/cdqa-suite/cdQA.git" @@ -46,12 +46,12 @@ "output_type": "stream", "text": [ "Cloning into 'cdQA'...\n", - "remote: Enumerating objects: 45, done.\u001b[K\n", - "remote: Counting objects: 2% (1/45) \u001b[K\rremote: Counting objects: 4% (2/45) \u001b[K\rremote: Counting objects: 6% (3/45) \u001b[K\rremote: Counting objects: 8% (4/45) \u001b[K\rremote: Counting objects: 11% (5/45) \u001b[K\rremote: Counting objects: 13% (6/45) \u001b[K\rremote: Counting objects: 15% (7/45) \u001b[K\rremote: Counting objects: 17% (8/45) \u001b[K\rremote: Counting objects: 20% (9/45) \u001b[K\rremote: Counting objects: 22% (10/45) \u001b[K\rremote: Counting objects: 24% (11/45) \u001b[K\rremote: Counting objects: 26% (12/45) \u001b[K\rremote: Counting objects: 28% (13/45) \u001b[K\rremote: Counting objects: 31% (14/45) \u001b[K\rremote: Counting objects: 33% (15/45) \u001b[K\rremote: Counting objects: 35% (16/45) \u001b[K\rremote: Counting objects: 37% (17/45) \u001b[K\rremote: Counting objects: 40% (18/45) \u001b[K\rremote: Counting objects: 42% (19/45) \u001b[K\rremote: Counting objects: 44% (20/45) \u001b[K\rremote: Counting objects: 46% (21/45) \u001b[K\rremote: Counting objects: 48% (22/45) \u001b[K\rremote: Counting objects: 51% (23/45) \u001b[K\rremote: Counting objects: 53% (24/45) \u001b[K\rremote: Counting objects: 55% (25/45) \u001b[K\rremote: Counting objects: 57% (26/45) \u001b[K\rremote: Counting objects: 60% (27/45) \u001b[K\rremote: Counting objects: 62% (28/45) \u001b[K\rremote: Counting objects: 64% (29/45) \u001b[K\rremote: Counting objects: 66% (30/45) \u001b[K\rremote: Counting objects: 68% (31/45) \u001b[K\rremote: Counting objects: 71% (32/45) \u001b[K\rremote: Counting objects: 73% (33/45) \u001b[K\rremote: Counting objects: 75% (34/45) \u001b[K\rremote: Counting objects: 77% (35/45) \u001b[K\rremote: Counting objects: 80% (36/45) \u001b[K\rremote: Counting objects: 82% (37/45) \u001b[K\rremote: Counting objects: 84% (38/45) \u001b[K\rremote: Counting objects: 86% (39/45) \u001b[K\rremote: Counting objects: 88% (40/45) \u001b[K\rremote: Counting objects: 91% (41/45) \u001b[K\rremote: Counting objects: 93% (42/45) \u001b[K\rremote: Counting objects: 95% (43/45) \u001b[K\rremote: Counting objects: 97% (44/45) \u001b[K\rremote: Counting objects: 100% (45/45) \u001b[K\rremote: Counting objects: 100% (45/45), done.\u001b[K\n", - "remote: Compressing objects: 2% (1/35) \u001b[K\rremote: Compressing objects: 5% (2/35) \u001b[K\rremote: Compressing objects: 8% (3/35) \u001b[K\rremote: Compressing objects: 11% (4/35) \u001b[K\rremote: Compressing objects: 14% (5/35) \u001b[K\rremote: Compressing objects: 17% (6/35) \u001b[K\rremote: Compressing objects: 20% (7/35) \u001b[K\rremote: Compressing objects: 22% (8/35) \u001b[K\rremote: Compressing objects: 25% (9/35) \u001b[K\rremote: Compressing objects: 28% (10/35) \u001b[K\rremote: Compressing objects: 31% (11/35) \u001b[K\rremote: Compressing objects: 34% (12/35) \u001b[K\rremote: Compressing objects: 37% (13/35) \u001b[K\rremote: Compressing objects: 40% (14/35) \u001b[K\rremote: Compressing objects: 42% (15/35) \u001b[K\rremote: Compressing objects: 45% (16/35) \u001b[K\rremote: Compressing objects: 48% (17/35) \u001b[K\rremote: Compressing objects: 51% (18/35) \u001b[K\rremote: Compressing objects: 54% (19/35) \u001b[K\rremote: Compressing objects: 57% (20/35) \u001b[K\rremote: Compressing objects: 60% (21/35) \u001b[K\rremote: Compressing objects: 62% (22/35) \u001b[K\rremote: Compressing objects: 65% (23/35) \u001b[K\rremote: Compressing objects: 68% (24/35) \u001b[K\rremote: Compressing objects: 71% (25/35) \u001b[K\rremote: Compressing objects: 74% (26/35) \u001b[K\rremote: Compressing objects: 77% (27/35) \u001b[K\rremote: Compressing objects: 80% (28/35) \u001b[K\rremote: Compressing objects: 82% (29/35) \u001b[K\rremote: Compressing objects: 85% (30/35) \u001b[K\rremote: Compressing objects: 88% (31/35) \u001b[K\rremote: Compressing objects: 91% (32/35) \u001b[K\rremote: Compressing objects: 94% (33/35) \u001b[K\rremote: Compressing objects: 97% (34/35) \u001b[K\rremote: Compressing objects: 100% (35/35) \u001b[K\rremote: Compressing objects: 100% (35/35), done.\u001b[K\n", - "Receiving objects: 0% (1/841) \rReceiving objects: 1% (9/841) \rReceiving objects: 2% (17/841) \rReceiving objects: 3% (26/841) \rReceiving objects: 4% (34/841) \rReceiving objects: 5% (43/841) \rReceiving objects: 6% (51/841) \rReceiving objects: 7% (59/841) \rReceiving objects: 8% (68/841) \rReceiving objects: 9% (76/841) \rReceiving objects: 10% (85/841) \rReceiving objects: 11% (93/841) \rReceiving objects: 12% (101/841) \rReceiving objects: 13% (110/841) \rReceiving objects: 14% (118/841) \rReceiving objects: 15% (127/841) \rReceiving objects: 16% (135/841) \rReceiving objects: 17% (143/841) \rReceiving objects: 18% (152/841) \rReceiving objects: 19% (160/841) \rReceiving objects: 20% (169/841) \rReceiving objects: 21% (177/841) \rReceiving objects: 22% (186/841) \rReceiving objects: 23% (194/841) \rReceiving objects: 24% (202/841) \rReceiving objects: 25% (211/841) \rReceiving objects: 26% (219/841) \rReceiving objects: 27% (228/841) \rReceiving objects: 28% (236/841) \rReceiving objects: 29% (244/841) \rReceiving objects: 30% (253/841) \rReceiving objects: 31% (261/841) \rReceiving objects: 32% (270/841) \rReceiving objects: 33% (278/841) \rReceiving objects: 34% (286/841) \rReceiving objects: 35% (295/841) \rReceiving objects: 36% (303/841) \rReceiving objects: 37% (312/841) \rReceiving objects: 38% (320/841) \rReceiving objects: 39% (328/841) \rReceiving objects: 40% (337/841) \rReceiving objects: 41% (345/841) \rReceiving objects: 42% (354/841) \rReceiving objects: 43% (362/841) \rReceiving objects: 44% (371/841) \rReceiving objects: 45% (379/841) \rReceiving objects: 46% (387/841) \rReceiving objects: 47% (396/841) \rReceiving objects: 48% (404/841) \rReceiving objects: 49% (413/841) \rReceiving objects: 50% (421/841) \rReceiving objects: 51% (429/841) \rReceiving objects: 52% (438/841) \rReceiving objects: 53% (446/841) \rReceiving objects: 54% (455/841) \rReceiving objects: 55% (463/841) \rReceiving objects: 56% (471/841) \rReceiving objects: 57% (480/841) \rReceiving objects: 58% (488/841) \rReceiving objects: 59% (497/841) \rReceiving objects: 60% (505/841) \rReceiving objects: 61% (514/841) \rReceiving objects: 62% (522/841) \rReceiving objects: 63% (530/841) \rReceiving objects: 64% (539/841) \rReceiving objects: 65% (547/841) \rReceiving objects: 66% (556/841) \rReceiving objects: 67% (564/841) \rReceiving objects: 68% (572/841) \rReceiving objects: 69% (581/841) \rReceiving objects: 70% (589/841) \rReceiving objects: 71% (598/841) \rReceiving objects: 72% (606/841) \rReceiving objects: 73% (614/841) \rReceiving objects: 74% (623/841) \rremote: Total 841 (delta 22), reused 25 (delta 10), pack-reused 796\u001b[K\n", - "Receiving objects: 75% (631/841) \rReceiving objects: 76% (640/841) \rReceiving objects: 77% (648/841) \rReceiving objects: 78% (656/841) \rReceiving objects: 79% (665/841) \rReceiving objects: 80% (673/841) \rReceiving objects: 81% (682/841) \rReceiving objects: 82% (690/841) \rReceiving objects: 83% (699/841) \rReceiving objects: 84% (707/841) \rReceiving objects: 85% (715/841) \rReceiving objects: 86% (724/841) \rReceiving objects: 87% (732/841) \rReceiving objects: 88% (741/841) \rReceiving objects: 89% (749/841) \rReceiving objects: 90% (757/841) \rReceiving objects: 91% (766/841) \rReceiving objects: 92% (774/841) \rReceiving objects: 93% (783/841) \rReceiving objects: 94% (791/841) \rReceiving objects: 95% (799/841) \rReceiving objects: 96% (808/841) \rReceiving objects: 97% (816/841) \rReceiving objects: 98% (825/841) \rReceiving objects: 99% (833/841) \rReceiving objects: 100% (841/841) \rReceiving objects: 100% (841/841), 266.80 KiB | 11.12 MiB/s, done.\n", - "Resolving deltas: 0% (0/489) \rResolving deltas: 1% (5/489) \rResolving deltas: 4% (22/489) \rResolving deltas: 5% (25/489) \rResolving deltas: 6% (31/489) \rResolving deltas: 7% (38/489) \rResolving deltas: 11% (54/489) \rResolving deltas: 13% (66/489) \rResolving deltas: 19% (96/489) \rResolving deltas: 21% (105/489) \rResolving deltas: 23% (117/489) \rResolving deltas: 27% (133/489) \rResolving deltas: 31% (152/489) \rResolving deltas: 33% (162/489) \rResolving deltas: 34% (167/489) \rResolving deltas: 35% (174/489) \rResolving deltas: 37% (184/489) \rResolving deltas: 40% (196/489) \rResolving deltas: 41% (202/489) \rResolving deltas: 42% (206/489) \rResolving deltas: 64% (317/489) \rResolving deltas: 65% (318/489) \rResolving deltas: 66% (325/489) \rResolving deltas: 67% (329/489) \rResolving deltas: 68% (333/489) \rResolving deltas: 71% (350/489) \rResolving deltas: 72% (353/489) \rResolving deltas: 74% (362/489) \rResolving deltas: 77% (380/489) \rResolving deltas: 80% (392/489) \rResolving deltas: 82% (403/489) \rResolving deltas: 87% (427/489) \rResolving deltas: 88% (433/489) \rResolving deltas: 89% (440/489) \rResolving deltas: 90% (441/489) \rResolving deltas: 91% (449/489) \rResolving deltas: 92% (454/489) \rResolving deltas: 93% (459/489) \rResolving deltas: 94% (463/489) \rResolving deltas: 95% (466/489) \rResolving deltas: 100% (489/489) \rResolving deltas: 100% (489/489), done.\n" + "remote: Enumerating objects: 100, done.\u001b[K\n", + "remote: Counting objects: 100% (100/100), done.\u001b[K\n", + "remote: Compressing objects: 100% (77/77), done.\u001b[K\n", + "remote: Total 896 (delta 60), reused 53 (delta 23), pack-reused 796\n", + "Receiving objects: 100% (896/896), 287.73 KiB | 805.00 KiB/s, done.\n", + "Resolving deltas: 100% (527/527), done.\n" ], "name": "stdout" } @@ -72,63 +72,21 @@ "execution_count": 0, "outputs": [] }, - { - "cell_type": "code", - "metadata": { - "id": "i8-WoI4eFw_E", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 366 - }, - "outputId": "4a564950-1d3f-4627-8fa2-63c61fa579de" - }, - "source": [ - "!ls -la" - ], - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "text": [ - "total 84\n", - "drwxr-xr-x 7 root root 4096 Jul 15 15:04 .\n", - "drwxr-xr-x 1 root root 4096 Jul 15 15:04 ..\n", - "-rw-r--r-- 1 root root 815 Jul 15 15:04 api.py\n", - "-rw-r--r-- 1 root root 22 Jul 15 15:04 apt.txt\n", - "drwxr-xr-x 6 root root 4096 Jul 15 15:04 cdqa\n", - "-rw-r--r-- 1 root root 1452 Jul 15 15:04 download.py\n", - "drwxr-xr-x 2 root root 4096 Jul 15 15:04 examples\n", - "drwxr-xr-x 8 root root 4096 Jul 15 15:04 .git\n", - "drwxr-xr-x 3 root root 4096 Jul 15 15:04 .github\n", - "-rw-r--r-- 1 root root 1375 Jul 15 15:04 .gitignore\n", - "-rw-r--r-- 1 root root 11356 Jul 15 15:04 LICENSE\n", - "-rw-r--r-- 1 root root 40 Jul 15 15:04 MANIFEST.in\n", - "-rw-r--r-- 1 root root 12184 Jul 15 15:04 README.md\n", - "-rw-r--r-- 1 root root 95 Jul 15 15:04 requirements.txt\n", - "-rw-r--r-- 1 root root 727 Jul 15 15:04 setup.py\n", - "drwxr-xr-x 2 root root 4096 Jul 15 15:04 tests\n", - "-rw-r--r-- 1 root root 312 Jul 15 15:04 .travis.yml\n" - ], - "name": "stdout" - } - ] - }, { "cell_type": "code", "metadata": { "id": "5jBtSKczGF38", "colab_type": "code", + "outputId": "1ca2a098-6850-40cf-a86f-b18ef9a2d47f", "colab": { "base_uri": "https://localhost:8080/", - "height": 56 - }, - "outputId": "d6cd8cdc-94dd-432c-9df9-7ac93b25c5f0" + "height": 55 + } }, "source": [ "!git checkout sync-huggingface" ], - "execution_count": 4, + "execution_count": 3, "outputs": [ { "output_type": "stream", @@ -145,16 +103,16 @@ "metadata": { "id": "DHl2HUX1GRd6", "colab_type": "code", + "outputId": "6395a1be-09c2-4cb9-a431-35b19fac0e74", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 - }, - "outputId": "7dd4a79c-827a-4491-dc6b-7ab369dfbf3e" + } }, "source": [ "!pip install -e ." ], - "execution_count": 5, + "execution_count": 4, "outputs": [ { "output_type": "stream", @@ -168,7 +126,13 @@ "Requirement already satisfied: prettytable in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.7.2)\n", "Collecting pytorch_pretrained_bert (from cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)\n", - "\u001b[K |████████████████████████████████| 133kB 4.1MB/s \n", + "\u001b[K |████████████████████████████████| 133kB 4.2MB/s \n", + "\u001b[?25hCollecting pytorch-transformers (from cdqa==1.0.3)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/b5/2d78e74001af0152ee61d5ad4e290aec9a1e43925b21df2dc74ec100f1ab/pytorch_transformers-1.0.0-py3-none-any.whl (137kB)\n", + "\u001b[K |████████████████████████████████| 143kB 43.9MB/s \n", + "\u001b[?25hCollecting tensorboardX (from cdqa==1.0.3)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c3/12/dcaf67e1312475b26db9e45e7bb6f32b540671a9ee120b3a72d9e09bc517/tensorboardX-1.8-py2.py3-none-any.whl (216kB)\n", + "\u001b[K |████████████████████████████████| 225kB 34.4MB/s \n", "\u001b[?25hRequirement already satisfied: scikit_learn in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.21.2)\n", "Collecting tika (from cdqa==1.0.3)\n", " Downloading https://files.pythonhosted.org/packages/10/75/b566e446ffcf292f10c8d84c15a3d91615fe3d7ca8072a17c949d4e84b66/tika-1.19.tar.gz\n", @@ -181,24 +145,28 @@ "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (1.1.0)\n", "Requirement already satisfied: Six in /usr/local/lib/python3.6/dist-packages (from flask_cors->cdqa==1.0.3) (1.12.0)\n", "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2018.9)\n", - "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (1.16.4)\n", "Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2.5.3)\n", + "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (1.16.4)\n", "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.9.185)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (2.21.0)\n", "Collecting regex (from pytorch_pretrained_bert->cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB)\n", - "\u001b[K |████████████████████████████████| 655kB 8.9MB/s \n", - "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (2.21.0)\n", - "Requirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", + "\u001b[K |████████████████████████████████| 655kB 29.0MB/s \n", + "\u001b[?25hRequirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", + "Collecting sentencepiece (from pytorch-transformers->cdqa==1.0.3)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/00/95/7f357995d5eb1131aa2092096dca14a6fc1b1d2860bd99c22a612e1d1019/sentencepiece-0.1.82-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n", + "\u001b[K |████████████████████████████████| 1.0MB 33.5MB/s \n", + "\u001b[?25hRequirement already satisfied: protobuf>=3.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardX->cdqa==1.0.3) (3.7.1)\n", "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit_learn->cdqa==1.0.3) (1.3.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from tika->cdqa==1.0.3) (41.0.1)\n", "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->Flask->cdqa==1.0.3) (1.1.1)\n", "Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.2.1)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.9.4)\n", "Requirement already satisfied: botocore<1.13.0,>=1.12.185 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (1.12.185)\n", - "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (3.0.4)\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.9.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2019.6.16)\n", - "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (1.24.3)\n", "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2.8)\n", + "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (1.24.3)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (3.0.4)\n", "Requirement already satisfied: docutils>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.13.0,>=1.12.185->boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.14)\n", "Building wheels for collected packages: tika, wget, regex\n", " Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n", @@ -208,9 +176,9 @@ " Building wheel for regex (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Stored in directory: /root/.cache/pip/wheels/35/e4/80/abf3b33ba89cf65cd262af8a22a5a999cc28fbfabea6b38473\n", "Successfully built tika wget regex\n", - "Installing collected packages: flask-cors, regex, pytorch-pretrained-bert, tika, wget, cdqa\n", + "Installing collected packages: flask-cors, regex, pytorch-pretrained-bert, sentencepiece, pytorch-transformers, tensorboardX, tika, wget, cdqa\n", " Running setup.py develop for cdqa\n", - "Successfully installed cdqa flask-cors-3.0.8 pytorch-pretrained-bert-0.6.2 regex-2019.6.8 tika-1.19 wget-3.2\n" + "Successfully installed cdqa flask-cors-3.0.8 pytorch-pretrained-bert-0.6.2 pytorch-transformers-1.0.0 regex-2019.6.8 sentencepiece-0.1.82 tensorboardX-1.8 tika-1.19 wget-3.2\n" ], "name": "stdout" } @@ -244,57 +212,111 @@ "metadata": { "id": "50r4anYBITRO", "colab_type": "code", + "outputId": "c2891b00-1f52-412c-9377-7c2b2dd18510", "colab": { "base_uri": "https://localhost:8080/", - "height": 94 + "height": 457 + } + }, + "source": [ + "!ls -la" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "total 79828\n", + "drwxr-xr-x 8 root root 4096 Jul 16 17:19 .\n", + "drwxr-xr-x 1 root root 4096 Jul 16 17:18 ..\n", + "-rw-r--r-- 1 root root 815 Jul 16 17:18 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 16 17:18 apt.txt\n", + "drwxr-xr-x 6 root root 4096 Jul 16 17:18 cdqa\n", + "drwxr-xr-x 2 root root 4096 Jul 16 17:19 cdqa.egg-info\n", + "-rw-r--r-- 1 root root 4854279 Jul 16 17:19 dev-v1.1.json\n", + "-rw-r--r-- 1 root root 4370528 Jul 16 17:19 dev-v2.0.json\n", + "-rw-r--r-- 1 root root 1452 Jul 16 17:18 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 16 17:18 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 16 17:18 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 16 17:18 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 16 17:18 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 16 17:18 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 16 17:18 MANIFEST.in\n", + "-rw-r--r-- 1 root root 12855 Jul 16 17:18 README.md\n", + "-rw-r--r-- 1 root root 129 Jul 16 17:18 requirements.txt\n", + "-rw-r--r-- 1 root root 727 Jul 16 17:18 setup.py\n", + "drwxr-xr-x 2 root root 4096 Jul 16 17:18 tests\n", + "-rw-r--r-- 1 root root 30288272 Jul 16 17:19 train-v1.1.json\n", + "-rw-r--r-- 1 root root 42123633 Jul 16 17:19 train-v2.0.json\n", + "-rw-r--r-- 1 root root 312 Jul 16 17:18 .travis.yml\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-25T14:21:08.091797Z", + "start_time": "2019-06-25T14:21:03.027877Z" }, - "outputId": "c4caa2d3-a1b1-4bd5-dece-36153bddf7d6" + "id": "umJkmO9HFf3L", + "colab_type": "code", + "outputId": "2ae27930-f375-40f1-da10-3e2e75c3c416", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 75 + } }, "source": [ - "!ls" + "import os\n", + "import torch\n", + "from sklearn.externals import joblib\n", + "from cdqa.reader.reader_sklearn import Reader" ], "execution_count": 7, "outputs": [ { "output_type": "stream", "text": [ - "api.py\t dev-v1.1.json LICENSE\t\tsetup.py\n", - "apt.txt dev-v2.0.json MANIFEST.in\ttests\n", - "cdqa\t download.py README.md\t\ttrain-v1.1.json\n", - "cdqa.egg-info examples requirements.txt\ttrain-v2.0.json\n" + "/usr/local/lib/python3.6/dist-packages/sklearn/externals/joblib/__init__.py:15: DeprecationWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.\n", + " warnings.warn(msg, category=DeprecationWarning)\n" ], - "name": "stdout" + "name": "stderr" } ] }, { "cell_type": "code", "metadata": { - "id": "oXW27LR3KvrT", + "id": "ylorIsqLz_J3", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 191 + "height": 247 }, - "outputId": "d9f17f5d-bff0-40ae-880c-4f86fa75daec" + "outputId": "90dcdf41-69fe-4527-b0fc-c7c66b5ebb21" }, "source": [ - "!pip install tensorboardX" + "!wget https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json" ], - "execution_count": 9, + "execution_count": 10, "outputs": [ { "output_type": "stream", "text": [ - "Collecting tensorboardX\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c3/12/dcaf67e1312475b26db9e45e7bb6f32b540671a9ee120b3a72d9e09bc517/tensorboardX-1.8-py2.py3-none-any.whl (216kB)\n", - "\r\u001b[K |█▌ | 10kB 13.3MB/s eta 0:00:01\r\u001b[K |███ | 20kB 1.8MB/s eta 0:00:01\r\u001b[K |████▌ | 30kB 2.7MB/s eta 0:00:01\r\u001b[K |██████ | 40kB 1.7MB/s eta 0:00:01\r\u001b[K |███████▋ | 51kB 2.2MB/s eta 0:00:01\r\u001b[K |█████████ | 61kB 2.6MB/s eta 0:00:01\r\u001b[K |██████████▋ | 71kB 3.0MB/s eta 0:00:01\r\u001b[K |████████████▏ | 81kB 3.4MB/s eta 0:00:01\r\u001b[K |█████████████▋ | 92kB 3.8MB/s eta 0:00:01\r\u001b[K |███████████████▏ | 102kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████▊ | 112kB 2.9MB/s eta 0:00:01\r\u001b[K |██████████████████▏ | 122kB 2.9MB/s eta 0:00:01\r\u001b[K |███████████████████▊ | 133kB 2.9MB/s eta 0:00:01\r\u001b[K |█████████████████████▏ | 143kB 2.9MB/s eta 0:00:01\r\u001b[K |██████████████████████▊ | 153kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████████████▎ | 163kB 2.9MB/s eta 0:00:01\r\u001b[K |█████████████████████████▊ | 174kB 2.9MB/s eta 0:00:01\r\u001b[K |███████████████████████████▎ | 184kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████████████████▉ | 194kB 2.9MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 204kB 2.9MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▉| 215kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 225kB 2.9MB/s \n", - "\u001b[?25hRequirement already satisfied: protobuf>=3.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardX) (3.7.1)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from tensorboardX) (1.16.4)\n", - "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from tensorboardX) (1.12.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.2.0->tensorboardX) (41.0.1)\n", - "Installing collected packages: tensorboardX\n", - "Successfully installed tensorboardX-1.8\n" + "--2019-07-16 17:24:04-- https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 8786 (8.6K) [text/plain]\n", + "Saving to: ‘dev-v2.0-small.json’\n", + "\n", + "\rdev-v2.0-small.json 0%[ ] 0 --.-KB/s \rdev-v2.0-small.json 100%[===================>] 8.58K --.-KB/s in 0s \n", + "\n", + "2019-07-16 17:24:04 (126 MB/s) - ‘dev-v2.0-small.json’ saved [8786/8786]\n", + "\n" ], "name": "stdout" } @@ -303,46 +325,630 @@ { "cell_type": "code", "metadata": { - "ExecuteTime": { - "end_time": "2019-06-25T14:21:08.091797Z", - "start_time": "2019-06-25T14:21:03.027877Z" + "id": "9501kcG30SGd", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# !pip install apex" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "owyoli60qGb9", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 935 }, - "id": "umJkmO9HFf3L", + "outputId": "cc99edb0-f821-4afc-c347-3413f746dfa9" + }, + "source": [ + "reader = Reader(train_file='dev-v2.0-small.json',\n", + " predict_file='dev-v2.0-small.json',\n", + " model_type='xlnet',\n", + " model_name_or_path='xlnet-base-cased',\n", + " fp16=False,\n", + " output_dir='.')" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "text": [ + "07/16/2019 17:27:20 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "07/16/2019 17:27:20 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/16/2019 17:27:20 - INFO - pytorch_transformers.modeling_utils - Model config {\n", + " \"attn_type\": \"bi\",\n", + " \"bi_data\": false,\n", + " \"clamp_len\": -1,\n", + " \"d_head\": 64,\n", + " \"d_inner\": 3072,\n", + " \"d_model\": 768,\n", + " \"dropout\": 0.1,\n", + " \"end_n_top\": 5,\n", + " \"ff_activation\": \"gelu\",\n", + " \"finetuning_task\": null,\n", + " \"initializer_range\": 0.02,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"mem_len\": null,\n", + " \"n_head\": 12,\n", + " \"n_layer\": 12,\n", + " \"n_token\": 32000,\n", + " \"num_labels\": 2,\n", + " \"output_attentions\": false,\n", + " \"output_hidden_states\": false,\n", + " \"reuse_len\": null,\n", + " \"same_length\": false,\n", + " \"start_n_top\": 5,\n", + " \"summary_activation\": \"tanh\",\n", + " \"summary_last_dropout\": 0.1,\n", + " \"summary_type\": \"last\",\n", + " \"summary_use_proj\": true,\n", + " \"torchscript\": false,\n", + " \"untie_r\": true\n", + "}\n", + "\n", + "07/16/2019 17:27:21 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/16/2019 17:27:22 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/16/2019 17:27:26 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", + "07/16/2019 17:27:26 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", + " do_lower_case=True, do_train=None, doc_stride=128,\n", + " eval_all_checkpoints=True, evaluate_during_training=True, fp16=False,\n", + " fp16_opt_level='O1', gradient_accumulation_steps=1, learning_rate=5e-05,\n", + " local_rank=-1, logging_steps=50, max_answer_length=30, max_grad_norm=1.0,\n", + " max_query_length=64, max_seq_length=384, max_steps=-1,\n", + " model_name_or_path='xlnet-base-cased', model_type='xlnet',\n", + " n_best_size=20, no_cuda=True, null_score_diff_threshold=0.0,\n", + " num_train_epochs=3.0, output_dir='.', overwrite_cache=True,\n", + " overwrite_output_dir=True, per_gpu_eval_batch_size=8, ...)\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "24eT2nuKtrqp", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 548 + "height": 1000 }, - "outputId": "2965eac0-1d9c-4699-f410-5437482e41c2" + "outputId": "9f64f34d-4782-4af3-832d-a892a7decd43" }, "source": [ - "import os\n", - "import torch\n", - "from sklearn.externals import joblib\n", - "from cdqa.reader.reader_sklearn import Reader\n", - "\n", - "reader = Reader(train_file='train-v2.0.json',\n", - " predict_file='dev-v2.0.json',\n", - " model_name='xlnet',\n", - " output_dir='.')\n", - "\n", - "reader.fit()" + "reader.fit(X='')" ], - "execution_count": 10, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "text": [ + "07/16/2019 17:27:26 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at dev-v2.0-small.json\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁in ▁what ▁country ▁is ▁nor man dy ▁located ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 25 113 234 27 2387 249 2087 798 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 61\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 63\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁ franc e\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 1\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁were ▁the ▁nor man s ▁in ▁nor man dy ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 13:0 14:1 15:1 16:1 17:2 18:2 19:2 20:2 21:2 22:3 23:3 24:3 25:3 26:3 27:3 28:4 29:4 30:5 31:5 32:5 33:5 34:6 35:6 36:6 37:7 38:7 39:7 40:8 41:9 42:10 43:11 44:12 45:13 46:14 47:14 48:15 49:16 50:16 51:17 52:18 53:19 54:20 55:21 56:22 57:22 58:22 59:22 60:23 61:24 62:25 63:26 64:26 65:26 66:26 67:27 68:28 69:29 70:30 71:31 72:31 73:32 74:32 75:32 76:32 77:32 78:32 79:33 80:34 81:35 82:35 83:35 84:35 85:35 86:35 87:35 88:36 89:36 90:37 91:38 92:39 93:40 94:40 95:40 96:40 97:41 98:41 99:42 100:43 101:43 102:44 103:44 104:45 105:46 106:47 107:48 108:48 109:48 110:49 111:50 112:51 113:52 114:52 115:52 116:53 117:54 118:55 119:55 120:55 121:56 122:56 123:57 124:58 125:59 126:59 127:59 128:59 129:60 130:61 131:62 132:63 133:64 134:65 135:66 136:67 137:68 138:69 139:69 140:70 141:71 142:71 143:71 144:71 145:71 146:71 147:72 148:72 149:73 150:74 151:75 152:76 153:77 154:78 155:79 156:80 157:80 158:80 159:80 160:80 161:81 162:82 163:83 164:84 165:84 166:84 167:84 168:85 169:86 170:87 171:88 172:89 173:90 174:91 175:92 176:93 177:93 178:93 179:94 180:95 181:96 182:97 183:98 184:99 185:100 186:101 187:102 188:102 189:103 190:103 191:104 192:105 193:106 194:107 195:108 196:109 197:110 198:111 199:112 200:112\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 55 18 2387 249 23 25 2387 249 2087 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 46\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 51\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁10 th ▁and ▁11 th ▁centuries\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 2\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁from ▁which ▁countries ▁did ▁the ▁nor se ▁originate ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 40 59 452 190 18 2387 1022 19788 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 91\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 99\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁ den mark , ▁ice land ▁and ▁nor way\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 3\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁did ▁king ▁ char les ▁ iii ▁swear ▁fe al ty ▁to ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 16:0 17:1 18:1 19:1 20:2 21:2 22:2 23:2 24:2 25:3 26:3 27:3 28:3 29:3 30:3 31:4 32:4 33:5 34:5 35:5 36:5 37:6 38:6 39:6 40:7 41:7 42:7 43:8 44:9 45:10 46:11 47:12 48:13 49:14 50:14 51:15 52:16 53:16 54:17 55:18 56:19 57:20 58:21 59:22 60:22 61:22 62:22 63:23 64:24 65:25 66:26 67:26 68:26 69:26 70:27 71:28 72:29 73:30 74:31 75:31 76:32 77:32 78:32 79:32 80:32 81:32 82:33 83:34 84:35 85:35 86:35 87:35 88:35 89:35 90:35 91:36 92:36 93:37 94:38 95:39 96:40 97:40 98:40 99:40 100:41 101:41 102:42 103:43 104:43 105:44 106:44 107:45 108:46 109:47 110:48 111:48 112:48 113:49 114:50 115:51 116:52 117:52 118:52 119:53 120:54 121:55 122:55 123:55 124:56 125:56 126:57 127:58 128:59 129:59 130:59 131:59 132:60 133:61 134:62 135:63 136:64 137:65 138:66 139:67 140:68 141:69 142:69 143:70 144:71 145:71 146:71 147:71 148:71 149:71 150:72 151:72 152:73 153:74 154:75 155:76 156:77 157:78 158:79 159:80 160:80 161:80 162:80 163:80 164:81 165:82 166:83 167:84 168:84 169:84 170:84 171:85 172:86 173:87 174:88 175:89 176:90 177:91 178:92 179:93 180:93 181:93 182:94 183:95 184:96 185:97 186:98 187:99 188:100 189:101 190:102 191:102 192:103 193:103 194:104 195:105 196:106 197:107 198:108 199:109 200:110 201:111 202:112 203:112\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 190 3351 17 6628 1890 17 28488 13650 9151 212 982 22 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 4\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁did ▁the ▁frank ish ▁identity ▁emerge ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:1 13:1 14:2 15:2 16:2 17:2 18:2 19:3 20:3 21:3 22:3 23:3 24:3 25:4 26:4 27:5 28:5 29:5 30:5 31:6 32:6 33:6 34:7 35:7 36:7 37:8 38:9 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:16 48:17 49:18 50:19 51:20 52:21 53:22 54:22 55:22 56:22 57:23 58:24 59:25 60:26 61:26 62:26 63:26 64:27 65:28 66:29 67:30 68:31 69:31 70:32 71:32 72:32 73:32 74:32 75:32 76:33 77:34 78:35 79:35 80:35 81:35 82:35 83:35 84:35 85:36 86:36 87:37 88:38 89:39 90:40 91:40 92:40 93:40 94:41 95:41 96:42 97:43 98:43 99:44 100:44 101:45 102:46 103:47 104:48 105:48 106:48 107:49 108:50 109:51 110:52 111:52 112:52 113:53 114:54 115:55 116:55 117:55 118:56 119:56 120:57 121:58 122:59 123:59 124:59 125:59 126:60 127:61 128:62 129:63 130:64 131:65 132:66 133:67 134:68 135:69 136:69 137:70 138:71 139:71 140:71 141:71 142:71 143:71 144:72 145:72 146:73 147:74 148:75 149:76 150:77 151:78 152:79 153:80 154:80 155:80 156:80 157:80 158:81 159:82 160:83 161:84 162:84 163:84 164:84 165:85 166:86 167:87 168:88 169:89 170:90 171:91 172:92 173:93 174:93 175:93 176:94 177:95 178:96 179:97 180:98 181:99 182:100 183:101 184:102 185:102 186:103 187:103 188:104 189:105 190:106 191:107 192:108 193:109 194:110 195:111 196:112 197:112\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 190 18 23675 1406 3643 7624 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 5\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁was ▁the ▁duke ▁in ▁the ▁battle ▁of ▁has ting s ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 30 18 25950 25 18 1727 20 51 1203 23 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 246\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 251\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁ william ▁the ▁con quer or\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 6\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁principal ity ▁did ▁ william ▁the ▁con quer er ▁found ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3824 769 190 17 31499 18 2147 10792 118 255 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 7\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁branch ▁of ▁theoretical ▁computer ▁science ▁deals ▁with ▁broadly ▁classify ing ▁computational ▁problems ▁by ▁difficulty ▁and ▁class ▁of ▁relationship ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 22:0 23:1 24:2 25:3 26:4 27:5 28:6 29:7 30:8 31:9 32:10 33:11 34:12 35:13 36:14 37:15 38:16 39:17 40:18 41:18 42:19 43:20 44:21 45:22 46:23 47:24 48:25 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:32 57:32 58:33 59:34 60:35 61:36 62:37 63:38 64:39 65:40 66:41 67:42 68:43 69:44 70:45 71:46 72:46 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:64 93:65 94:66 95:67 96:68 97:69 98:69 99:70 100:71 101:72 102:73 103:73\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3709 20 13109 920 1767 4108 33 16026 27871 56 23228 708 37 6157 21 1075 20 1498 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 22\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 24\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁computational ▁complexity ▁theory\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 8\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁is ▁a ▁manual ▁application ▁of ▁mathematical ▁steps ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:13 25:14 26:15 27:16 28:17 29:18 30:18 31:19 32:20 33:21 34:22 35:23 36:24 37:25 38:25 39:26 40:27 41:28 42:29 43:30 44:31 45:32 46:32 47:33 48:34 49:35 50:36 51:37 52:38 53:39 54:40 55:41 56:42 57:43 58:44 59:45 60:46 61:46 62:46 63:47 64:48 65:49 66:50 67:51 68:52 69:52 70:53 71:54 72:55 73:56 74:57 75:58 76:59 77:60 78:61 79:62 80:63 81:64 82:65 83:66 84:67 85:68 86:69 87:69 88:70 89:71 90:72 91:73 92:73\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 27 24 6403 1479 20 12956 2094 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 9\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁measure ▁of ▁a ▁computational ▁problem ▁broadly ▁defines ▁the ▁inherent ▁difficulty ▁of ▁the ▁solution ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 17:0 18:1 19:2 20:3 21:4 22:5 23:6 24:7 25:8 26:9 27:10 28:11 29:12 30:12 31:13 32:14 33:15 34:16 35:16 36:17 37:18 38:19 39:19 40:20 41:21 42:21 43:22 44:23 45:24 46:25 47:26 48:27 49:28 50:29 51:30 52:31 53:32 54:33 55:33 56:34 57:35 58:36 59:37 60:38 61:39 62:40 63:41 64:41 65:42 66:43 67:44 68:45 69:46 70:46 71:47 72:48 73:49 74:50 75:51 76:52 77:52 78:53 79:54 80:55 81:56 82:57 83:58 84:59 85:59 86:59 87:60 88:61 89:62 90:62 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:69 99:70 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:75 108:76 109:77 110:78 111:79 112:79 113:79 114:80 115:81 116:82 117:82 118:82 119:83 120:84 121:85 122:86 123:87 124:88 125:89 126:90 127:91 128:92 129:93 130:94 131:95 132:96 133:97 134:98 135:99 136:100 137:101 138:102 139:103 140:103\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2310 20 24 23228 662 16026 14668 18 16507 6157 20 18 1938 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 24\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 29\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁if ▁its ▁solution ▁requires ▁significant ▁resources\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 10\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁method ▁is ▁used ▁to ▁in tu itive ly ▁assess ▁or ▁quantify ▁the ▁amount ▁of ▁resources ▁required ▁to ▁solve ▁a ▁computational ▁problem ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 25:0 26:1 27:2 28:3 29:4 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:12 39:13 40:14 41:15 42:16 43:16 44:17 45:18 46:19 47:19 48:20 49:21 50:21 51:22 52:23 53:24 54:25 55:26 56:27 57:28 58:29 59:30 60:31 61:32 62:33 63:33 64:34 65:35 66:36 67:37 68:38 69:39 70:40 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:46 79:47 80:48 81:49 82:50 83:51 84:52 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:59 94:59 95:60 96:61 97:62 98:62 99:62 100:63 101:64 102:65 103:66 104:67 105:68 106:69 107:70 108:70 109:70 110:71 111:72 112:73 113:73 114:74 115:75 116:76 117:77 118:78 119:79 120:79 121:79 122:80 123:81 124:82 125:82 126:82 127:83 128:84 129:85 130:86 131:87 132:88 133:89 134:90 135:91 136:92 137:93 138:94 139:95 140:96 141:97 142:98 143:99 144:100 145:101 146:102 147:103 148:103\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2175 27 179 22 25 2853 9736 111 7329 49 30299 18 1065 20 1485 978 22 4929 24 23228 662 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 53\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 56\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁mathematical ▁models ▁of ▁computation\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000011\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 11\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁are ▁two ▁basic ▁primary ▁resources ▁used ▁to ▁ gu age ▁complexity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:6 22:7 23:8 24:9 25:10 26:11 27:12 28:12 29:13 30:14 31:15 32:16 33:16 34:17 35:18 36:19 37:19 38:20 39:21 40:21 41:22 42:23 43:24 44:25 45:26 46:27 47:28 48:29 49:30 50:31 51:32 52:33 53:33 54:34 55:35 56:36 57:37 58:38 59:39 60:40 61:41 62:41 63:42 64:43 65:44 66:45 67:46 68:46 69:47 70:48 71:49 72:50 73:51 74:52 75:52 76:53 77:54 78:55 79:56 80:57 81:58 82:59 83:59 84:59 85:60 86:61 87:62 88:62 89:62 90:63 91:64 92:65 93:66 94:67 95:68 96:69 97:70 98:70 99:70 100:71 101:72 102:73 103:73 104:74 105:75 106:76 107:77 108:78 109:79 110:79 111:79 112:80 113:81 114:82 115:82 116:82 117:83 118:84 119:85 120:86 121:87 122:88 123:89 124:90 125:91 126:92 127:93 128:94 129:95 130:96 131:97 132:98 133:99 134:100 135:101 136:102 137:103 138:103\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 41 87 1949 1827 1485 179 22 17 3017 981 11906 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 65\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 67\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁time ▁and ▁storage\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000012\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 12\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁unit ▁is ▁measured ▁to ▁determine ▁circuit ▁simplicity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:12 25:13 26:14 27:15 28:16 29:16 30:17 31:18 32:19 33:19 34:20 35:21 36:21 37:22 38:23 39:24 40:25 41:26 42:27 43:28 44:29 45:30 46:31 47:32 48:33 49:33 50:34 51:35 52:36 53:37 54:38 55:39 56:40 57:41 58:41 59:42 60:43 61:44 62:45 63:46 64:46 65:47 66:48 67:49 68:50 69:51 70:52 71:52 72:53 73:54 74:55 75:56 76:57 77:58 78:59 79:59 80:59 81:60 82:61 83:62 84:62 85:62 86:63 87:64 88:65 89:66 90:67 91:68 92:69 93:70 94:70 95:70 96:71 97:72 98:73 99:73 100:74 101:75 102:76 103:77 104:78 105:79 106:79 107:79 108:80 109:81 110:82 111:82 112:82 113:83 114:84 115:85 116:86 117:87 118:88 119:89 120:90 121:91 122:92 123:93 124:94 125:95 126:96 127:97 128:98 129:99 130:100 131:101 132:102 133:103 134:103\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 1591 27 7375 22 2081 5034 18950 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000013\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 13\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁number ▁is ▁used ▁in ▁perpendicular ▁computing ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 18:8 19:9 20:10 21:11 22:12 23:12 24:13 25:14 26:15 27:16 28:16 29:17 30:18 31:19 32:19 33:20 34:21 35:21 36:22 37:23 38:24 39:25 40:26 41:27 42:28 43:29 44:30 45:31 46:32 47:33 48:33 49:34 50:35 51:36 52:37 53:38 54:39 55:40 56:41 57:41 58:42 59:43 60:44 61:45 62:46 63:46 64:47 65:48 66:49 67:50 68:51 69:52 70:52 71:53 72:54 73:55 74:56 75:57 76:58 77:59 78:59 79:59 80:60 81:61 82:62 83:62 84:62 85:63 86:64 87:65 88:66 89:67 90:68 91:69 92:70 93:70 94:70 95:71 96:72 97:73 98:73 99:74 100:75 101:76 102:77 103:78 104:79 105:79 106:79 107:80 108:81 109:82 110:82 111:82 112:83 113:84 114:85 115:86 116:87 117:88 118:89 119:90 120:91 121:92 122:93 123:94 124:95 125:96 126:97 127:98 128:99 129:100 130:101 131:102 132:103 133:103\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 243 27 179 25 30525 9848 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_train_xlnet-base-cased_384\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - ***** Running training *****\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Num examples = 14\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Num Epochs = 3\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Instantaneous batch size per GPU = 8\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Gradient Accumulation steps = 1\n", + "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Total optimization steps = 6\n", + "Epoch: 0%| | 0/3 [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexternals\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mjoblib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mcdqa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader_sklearn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mReader\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m reader = Reader(train_file='train-v2.0.json',\n", - "\u001b[0;32m/content/cdQA/cdqa/reader/reader_sklearn.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtensorboardX\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSummaryWriter\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 35\u001b[0;31m from pytorch_transformers import (WEIGHTS_NAME, BertConfig,\n\u001b[0m\u001b[1;32m 36\u001b[0m \u001b[0mBertForQuestionAnswering\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBertTokenizer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0mXLMConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mXLMForQuestionAnswering\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pytorch_transformers'", - "", - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" - ] + "output_type": "stream", + "text": [ + "total 546076\n", + "drwxr-xr-x 9 root root 4096 Jul 16 17:37 .\n", + "drwxr-xr-x 1 root root 4096 Jul 16 17:18 ..\n", + "-rw-r--r-- 1 root root 2 Jul 16 17:33 added_tokens.json\n", + "-rw-r--r-- 1 root root 815 Jul 16 17:18 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 16 17:18 apt.txt\n", + "-rw-r--r-- 1 root root 101108 Jul 16 17:36 cached_dev_xlnet-base-cased_384\n", + "-rw-r--r-- 1 root root 101136 Jul 16 17:27 cached_train_xlnet-base-cased_384\n", + "drwxr-xr-x 7 root root 4096 Jul 16 17:19 cdqa\n", + "drwxr-xr-x 2 root root 4096 Jul 16 17:19 cdqa.egg-info\n", + "-rw-r--r-- 1 root root 641 Jul 16 17:33 config.json\n", + "-rw-r--r-- 1 root root 4854279 Jul 16 17:19 dev-v1.1.json\n", + "-rw-r--r-- 1 root root 4370528 Jul 16 17:19 dev-v2.0.json\n", + "-rw-r--r-- 1 root root 8786 Jul 16 17:24 dev-v2.0-small.json\n", + "-rw-r--r-- 1 root root 1452 Jul 16 17:18 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 16 17:18 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 16 17:18 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 16 17:18 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 16 17:18 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 16 17:18 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 16 17:18 MANIFEST.in\n", + "-rw-r--r-- 1 root root 13572 Jul 16 17:37 nbest_predictions_.json\n", + "-rw-r--r-- 1 root root 759 Jul 16 17:37 null_odds_.json\n", + "-rw-r--r-- 1 root root 1212 Jul 16 17:37 predictions_.json\n", + "-rw-r--r-- 1 root root 476372095 Jul 16 17:33 pytorch_model.bin\n", + "-rw-r--r-- 1 root root 12855 Jul 16 17:18 README.md\n", + "-rw-r--r-- 1 root root 129 Jul 16 17:18 requirements.txt\n", + "drwxr-xr-x 4 root root 4096 Jul 16 17:27 runs\n", + "-rw-r--r-- 1 root root 727 Jul 16 17:18 setup.py\n", + "-rw-r--r-- 1 root root 202 Jul 16 17:33 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 798011 Jul 16 17:33 spiece.model\n", + "drwxr-xr-x 2 root root 4096 Jul 16 17:18 tests\n", + "-rw-r--r-- 1 root root 1244 Jul 16 17:33 training_args.bin\n", + "-rw-r--r-- 1 root root 30288272 Jul 16 17:19 train-v1.1.json\n", + "-rw-r--r-- 1 root root 42123633 Jul 16 17:19 train-v2.0.json\n", + "-rw-r--r-- 1 root root 312 Jul 16 17:18 .travis.yml\n" + ], + "name": "stdout" } ] }, From ca246727d96be83afbec8ca0a8a66eac3d5639cc Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 10:46:56 +0200 Subject: [PATCH 21/43] return final_prediction in predict() --- cdqa/reader/reader_sklearn.py | 68 +++++++++++++++++++++++++++++++++++ cdqa/reader/utils_squad.py | 15 +++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 4145f762..0d7d5d3a 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -306,6 +306,72 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal return dataset +def predict(args, model, tokenizer, prefix=""): + dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True) + + if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: + os.makedirs(args.output_dir) + + args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) + # Note that DistributedSampler samples randomly + eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset) + eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) + + # Eval! + logger.info("***** Running evaluation {} *****".format(prefix)) + logger.info(" Num examples = %d", len(dataset)) + logger.info(" Batch size = %d", args.eval_batch_size) + all_results = [] + for batch in tqdm(eval_dataloader, desc="Evaluating"): + model.eval() + batch = tuple(t.to(args.device) for t in batch) + with torch.no_grad(): + inputs = {'input_ids': batch[0], + 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids + 'attention_mask': batch[2]} + example_indices = batch[3] + if args.model_type in ['xlnet', 'xlm']: + inputs.update({'cls_index': batch[4], + 'p_mask': batch[5]}) + outputs = model(**inputs) + + for i, example_index in enumerate(example_indices): + eval_feature = features[example_index.item()] + unique_id = int(eval_feature.unique_id) + if args.model_type in ['xlnet', 'xlm']: + # XLNet uses a more complex post-processing procedure + result = RawResultExtended(unique_id = unique_id, + start_top_log_probs = to_list(outputs[0][i]), + start_top_index = to_list(outputs[1][i]), + end_top_log_probs = to_list(outputs[2][i]), + end_top_index = to_list(outputs[3][i]), + cls_logits = to_list(outputs[4][i])) + else: + result = RawResult(unique_id = unique_id, + start_logits = to_list(outputs[0][i]), + end_logits = to_list(outputs[1][i])) + all_results.append(result) + + # Compute predictions + output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) + output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) + output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + + if args.model_type in ['xlnet', 'xlm']: + # XLNet uses a more complex post-processing procedure + out_eval, final_prediction = write_predictions_extended(examples, features, all_results, args.n_best_size, + args.max_answer_length, output_prediction_file, + output_nbest_file, output_null_log_odds_file, args.predict_file, + model.config.start_n_top, model.config.end_n_top, + args.version_2_with_negative, tokenizer, args.verbose_logging) + else: + write_predictions(examples, features, all_results, args.n_best_size, + args.max_answer_length, args.do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, args.verbose_logging, + args.version_2_with_negative, args.null_score_diff_threshold) + + return out_eval, final_prediction + class Reader(BaseEstimator): """ """ @@ -513,4 +579,6 @@ def evaluate(self, X): def predict(self, X): + result = predict(self, self.model, self.tokenizer) + return '' diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 86e800d3..02445845 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -734,6 +734,7 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() + final_predictions = collections.OrderedDict() for (example_index, example) in enumerate(all_examples): features = example_index_to_features[example_index] @@ -867,6 +868,18 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s all_predictions[example.qas_id] = best_non_null_entry.text all_nbest_json[example.qas_id] = nbest_json + final_predictions[example.qas_id] = nbest_json[0] + + final_predictions_sorted = collections.OrderedDict(sorted(final_predictions.items(), + key=lambda item: item[1]['start_log_prob'] + + item[1]['end_log_prob'], + reverse=True)) + + question_id = list(final_predictions_sorted.items())[0][0] + title = [e for e in all_examples if e.qas_id == question_id][0].title + paragraph = [e for e in all_examples if e.qas_id == question_id][0].paragraph + + final_prediction = list(final_predictions_sorted.items())[0][1]['text'], title, paragraph with open(output_prediction_file, "w") as writer: writer.write(json.dumps(all_predictions, indent=4) + "\n") @@ -889,7 +902,7 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s find_all_best_thresh_v2(out_eval, all_predictions, exact_raw, f1_raw, scores_diff_json, qid_to_has_ans) - return out_eval + return out_eval, final_prediction def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): From d675c3b5ec0c2a084b4aa5ea15b9f7d18470b114 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 11:39:46 +0200 Subject: [PATCH 22/43] debug --- cdqa/reader/reader_sklearn.py | 8 ++++---- cdqa/reader/utils_squad.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 0d7d5d3a..14536ebb 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -272,14 +272,14 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal else: logger.info("Creating features from dataset file at %s", input_file) examples = read_squad_examples(input_file=input_file, - is_training=not evaluate, + is_training=not evaluate or not predict, version_2_with_negative=args.version_2_with_negative) features = convert_examples_to_features(examples=examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, - is_training=not evaluate) + is_training=not evaluate or not predict) if args.local_rank in [-1, 0]: logger.info("Saving features into cached file %s", cached_features_file) torch.save(features, cached_features_file) @@ -579,6 +579,6 @@ def evaluate(self, X): def predict(self, X): - result = predict(self, self.model, self.tokenizer) + out_eval, final_prediction = predict(self, self.model, self.tokenizer, prefix="") - return '' + return out_eval, final_prediction diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 02445845..490aa8cb 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -881,6 +881,8 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s final_prediction = list(final_predictions_sorted.items())[0][1]['text'], title, paragraph + print(final_prediction) + with open(output_prediction_file, "w") as writer: writer.write(json.dumps(all_predictions, indent=4) + "\n") From 67a6e46757b9def6472d5c6d8dcd563f1468df03 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 12:29:17 +0200 Subject: [PATCH 23/43] continue debug --- cdqa/reader/reader_sklearn.py | 9 ++++++--- cdqa/reader/utils_squad.py | 2 -- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 14536ebb..7c4426e2 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -262,24 +262,27 @@ def evaluate(args, model, tokenizer, prefix=""): def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): # Load data features from cache or dataset file input_file = args.predict_file if evaluate else args.train_file - cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( + try: + cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( 'dev' if evaluate else 'train', list(filter(None, args.model_name_or_path.split('/'))).pop(), str(args.max_seq_length))) + except: + cached_features_file = '' if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: logger.info("Loading features from cached file %s", cached_features_file) features = torch.load(cached_features_file) else: logger.info("Creating features from dataset file at %s", input_file) examples = read_squad_examples(input_file=input_file, - is_training=not evaluate or not predict, + is_training=not evaluate, version_2_with_negative=args.version_2_with_negative) features = convert_examples_to_features(examples=examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, - is_training=not evaluate or not predict) + is_training=not evaluate) if args.local_rank in [-1, 0]: logger.info("Saving features into cached file %s", cached_features_file) torch.save(features, cached_features_file) diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 490aa8cb..02445845 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -881,8 +881,6 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s final_prediction = list(final_predictions_sorted.items())[0][1]['text'], title, paragraph - print(final_prediction) - with open(output_prediction_file, "w") as writer: writer.write(json.dumps(all_predictions, indent=4) + "\n") From c67739cadf9020825f4f7d84a44cf5264f2cc84e Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 12:46:04 +0200 Subject: [PATCH 24/43] fix cached_features_file in predict mode --- cdqa/reader/reader_sklearn.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 7c4426e2..7d7b96d6 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -262,13 +262,10 @@ def evaluate(args, model, tokenizer, prefix=""): def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): # Load data features from cache or dataset file input_file = args.predict_file if evaluate else args.train_file - try: - cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( - 'dev' if evaluate else 'train', - list(filter(None, args.model_name_or_path.split('/'))).pop(), - str(args.max_seq_length))) - except: - cached_features_file = '' + cached_features_file = os.path.join(os.path.dirname(input_file) if isinstance(input_file, str) else 'temp', 'cached_{}_{}_{}'.format( + 'dev' if evaluate else 'train', + list(filter(None, args.model_name_or_path.split('/'))).pop(), + str(args.max_seq_length))) if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: logger.info("Loading features from cached file %s", cached_features_file) features = torch.load(cached_features_file) From b236cf7dc0b5e8a70b2027a470f717479b42075a Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 12:58:12 +0200 Subject: [PATCH 25/43] fix FileNotFoundError in torch.save() --- cdqa/reader/reader_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 7d7b96d6..25a05652 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -262,7 +262,7 @@ def evaluate(args, model, tokenizer, prefix=""): def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): # Load data features from cache or dataset file input_file = args.predict_file if evaluate else args.train_file - cached_features_file = os.path.join(os.path.dirname(input_file) if isinstance(input_file, str) else 'temp', 'cached_{}_{}_{}'.format( + cached_features_file = os.path.join(os.path.dirname(input_file) if isinstance(input_file, str) else '', 'cached_{}_{}_{}'.format( 'dev' if evaluate else 'train', list(filter(None, args.model_name_or_path.split('/'))).pop(), str(args.max_seq_length))) From e850e9fe0b38c10bfdb49c5a52c8aa7c59462427 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 13:45:16 +0200 Subject: [PATCH 26/43] Fix TypeError() in write_predictions_extended() --- cdqa/reader/utils_squad.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 02445845..0b71044c 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -891,8 +891,11 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s with open(output_null_log_odds_file, "w") as writer: writer.write(json.dumps(scores_diff_json, indent=4) + "\n") - with open(orig_data_file, "r", encoding='utf-8') as reader: - orig_data = json.load(reader)["data"] + if isinstance(orig_data_file, str): + with open(orig_data_file, "r", encoding='utf-8') as reader: + orig_data = json.load(reader)["data"] + else: + orig_data = orig_data_file qid_to_has_ans = make_qid_to_has_ans(orig_data) has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] From 860bdade3b47fa03f1572b616e6db09455f235d7 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 13:46:36 +0200 Subject: [PATCH 27/43] update XLNet / SQuAD 2.0 test notebook --- examples/tutorial-train-xlnet-squad.ipynb | 1738 +++++++++++++++------ 1 file changed, 1225 insertions(+), 513 deletions(-) diff --git a/examples/tutorial-train-xlnet-squad.ipynb b/examples/tutorial-train-xlnet-squad.ipynb index b6d08ae7..50f1056d 100644 --- a/examples/tutorial-train-xlnet-squad.ipynb +++ b/examples/tutorial-train-xlnet-squad.ipynb @@ -31,7 +31,7 @@ "metadata": { "id": "zNtCqwveFjcK", "colab_type": "code", - "outputId": "5976e1e4-179d-463f-8271-2436e0d32a4e", + "outputId": "33631378-b050-40df-bcf0-c5601c003091", "colab": { "base_uri": "https://localhost:8080/", "height": 151 @@ -46,12 +46,12 @@ "output_type": "stream", "text": [ "Cloning into 'cdQA'...\n", - "remote: Enumerating objects: 100, done.\u001b[K\n", - "remote: Counting objects: 100% (100/100), done.\u001b[K\n", - "remote: Compressing objects: 100% (77/77), done.\u001b[K\n", - "remote: Total 896 (delta 60), reused 53 (delta 23), pack-reused 796\n", - "Receiving objects: 100% (896/896), 287.73 KiB | 805.00 KiB/s, done.\n", - "Resolving deltas: 100% (527/527), done.\n" + "remote: Enumerating objects: 131, done.\u001b[K\n", + "remote: Counting objects: 100% (131/131), done.\u001b[K\n", + "remote: Compressing objects: 100% (92/92), done.\u001b[K\n", + "remote: Total 927 (delta 85), reused 76 (delta 39), pack-reused 796\u001b[K\n", + "Receiving objects: 100% (927/927), 319.29 KiB | 457.00 KiB/s, done.\n", + "Resolving deltas: 100% (552/552), done.\n" ], "name": "stdout" } @@ -77,7 +77,7 @@ "metadata": { "id": "5jBtSKczGF38", "colab_type": "code", - "outputId": "1ca2a098-6850-40cf-a86f-b18ef9a2d47f", + "outputId": "cd10460a-d2d9-4a8c-d2f9-85dc96012683", "colab": { "base_uri": "https://localhost:8080/", "height": 55 @@ -103,7 +103,7 @@ "metadata": { "id": "DHl2HUX1GRd6", "colab_type": "code", - "outputId": "6395a1be-09c2-4cb9-a431-35b19fac0e74", + "outputId": "1b8fe994-1a12-481c-b289-e0c3b605d4cf", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 @@ -126,13 +126,13 @@ "Requirement already satisfied: prettytable in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.7.2)\n", "Collecting pytorch_pretrained_bert (from cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)\n", - "\u001b[K |████████████████████████████████| 133kB 4.2MB/s \n", + "\u001b[K |████████████████████████████████| 133kB 52.2MB/s \n", "\u001b[?25hCollecting pytorch-transformers (from cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/b5/2d78e74001af0152ee61d5ad4e290aec9a1e43925b21df2dc74ec100f1ab/pytorch_transformers-1.0.0-py3-none-any.whl (137kB)\n", - "\u001b[K |████████████████████████████████| 143kB 43.9MB/s \n", + "\u001b[K |████████████████████████████████| 143kB 61.0MB/s \n", "\u001b[?25hCollecting tensorboardX (from cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c3/12/dcaf67e1312475b26db9e45e7bb6f32b540671a9ee120b3a72d9e09bc517/tensorboardX-1.8-py2.py3-none-any.whl (216kB)\n", - "\u001b[K |████████████████████████████████| 225kB 34.4MB/s \n", + "\u001b[K |████████████████████████████████| 225kB 63.6MB/s \n", "\u001b[?25hRequirement already satisfied: scikit_learn in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.21.2)\n", "Collecting tika (from cdqa==1.0.3)\n", " Downloading https://files.pythonhosted.org/packages/10/75/b566e446ffcf292f10c8d84c15a3d91615fe3d7ca8072a17c949d4e84b66/tika-1.19.tar.gz\n", @@ -141,31 +141,31 @@ " Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip\n", "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (2.10.1)\n", "Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (7.0)\n", - "Requirement already satisfied: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (0.15.4)\n", "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (1.1.0)\n", + "Requirement already satisfied: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (0.15.4)\n", "Requirement already satisfied: Six in /usr/local/lib/python3.6/dist-packages (from flask_cors->cdqa==1.0.3) (1.12.0)\n", + "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (1.16.4)\n", "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2018.9)\n", "Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2.5.3)\n", - "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (1.16.4)\n", + "Requirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.9.185)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (2.21.0)\n", "Collecting regex (from pytorch_pretrained_bert->cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB)\n", - "\u001b[K |████████████████████████████████| 655kB 29.0MB/s \n", - "\u001b[?25hRequirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", - "Collecting sentencepiece (from pytorch-transformers->cdqa==1.0.3)\n", + "\u001b[K |████████████████████████████████| 655kB 49.7MB/s \n", + "\u001b[?25hCollecting sentencepiece (from pytorch-transformers->cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/00/95/7f357995d5eb1131aa2092096dca14a6fc1b1d2860bd99c22a612e1d1019/sentencepiece-0.1.82-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n", - "\u001b[K |████████████████████████████████| 1.0MB 33.5MB/s \n", + "\u001b[K |████████████████████████████████| 1.0MB 51.9MB/s \n", "\u001b[?25hRequirement already satisfied: protobuf>=3.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardX->cdqa==1.0.3) (3.7.1)\n", "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit_learn->cdqa==1.0.3) (1.3.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from tika->cdqa==1.0.3) (41.0.1)\n", "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->Flask->cdqa==1.0.3) (1.1.1)\n", "Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.2.1)\n", - "Requirement already satisfied: botocore<1.13.0,>=1.12.185 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (1.12.185)\n", "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.9.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2019.6.16)\n", - "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2.8)\n", + "Requirement already satisfied: botocore<1.13.0,>=1.12.185 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (1.12.185)\n", "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (1.24.3)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2019.6.16)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (3.0.4)\n", "Requirement already satisfied: docutils>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.13.0,>=1.12.185->boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.14)\n", "Building wheels for collected packages: tika, wget, regex\n", @@ -212,7 +212,7 @@ "metadata": { "id": "50r4anYBITRO", "colab_type": "code", - "outputId": "c2891b00-1f52-412c-9377-7c2b2dd18510", + "outputId": "4231c98c-4d17-49e0-cd60-cd18af562124", "colab": { "base_uri": "https://localhost:8080/", "height": 457 @@ -227,28 +227,28 @@ "output_type": "stream", "text": [ "total 79828\n", - "drwxr-xr-x 8 root root 4096 Jul 16 17:19 .\n", - "drwxr-xr-x 1 root root 4096 Jul 16 17:18 ..\n", - "-rw-r--r-- 1 root root 815 Jul 16 17:18 api.py\n", - "-rw-r--r-- 1 root root 22 Jul 16 17:18 apt.txt\n", - "drwxr-xr-x 6 root root 4096 Jul 16 17:18 cdqa\n", - "drwxr-xr-x 2 root root 4096 Jul 16 17:19 cdqa.egg-info\n", - "-rw-r--r-- 1 root root 4854279 Jul 16 17:19 dev-v1.1.json\n", - "-rw-r--r-- 1 root root 4370528 Jul 16 17:19 dev-v2.0.json\n", - "-rw-r--r-- 1 root root 1452 Jul 16 17:18 download.py\n", - "drwxr-xr-x 2 root root 4096 Jul 16 17:18 examples\n", - "drwxr-xr-x 8 root root 4096 Jul 16 17:18 .git\n", - "drwxr-xr-x 3 root root 4096 Jul 16 17:18 .github\n", - "-rw-r--r-- 1 root root 1375 Jul 16 17:18 .gitignore\n", - "-rw-r--r-- 1 root root 11356 Jul 16 17:18 LICENSE\n", - "-rw-r--r-- 1 root root 40 Jul 16 17:18 MANIFEST.in\n", - "-rw-r--r-- 1 root root 12855 Jul 16 17:18 README.md\n", - "-rw-r--r-- 1 root root 129 Jul 16 17:18 requirements.txt\n", - "-rw-r--r-- 1 root root 727 Jul 16 17:18 setup.py\n", - "drwxr-xr-x 2 root root 4096 Jul 16 17:18 tests\n", - "-rw-r--r-- 1 root root 30288272 Jul 16 17:19 train-v1.1.json\n", - "-rw-r--r-- 1 root root 42123633 Jul 16 17:19 train-v2.0.json\n", - "-rw-r--r-- 1 root root 312 Jul 16 17:18 .travis.yml\n" + "drwxr-xr-x 8 root root 4096 Jul 17 11:30 .\n", + "drwxr-xr-x 1 root root 4096 Jul 17 11:30 ..\n", + "-rw-r--r-- 1 root root 815 Jul 17 11:30 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 17 11:30 apt.txt\n", + "drwxr-xr-x 6 root root 4096 Jul 17 11:30 cdqa\n", + "drwxr-xr-x 2 root root 4096 Jul 17 11:30 cdqa.egg-info\n", + "-rw-r--r-- 1 root root 4854279 Jul 17 11:30 dev-v1.1.json\n", + "-rw-r--r-- 1 root root 4370528 Jul 17 11:30 dev-v2.0.json\n", + "-rw-r--r-- 1 root root 1452 Jul 17 11:30 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 17 11:30 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 17 11:30 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 17 11:30 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 17 11:30 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 17 11:30 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 17 11:30 MANIFEST.in\n", + "-rw-r--r-- 1 root root 12855 Jul 17 11:30 README.md\n", + "-rw-r--r-- 1 root root 129 Jul 17 11:30 requirements.txt\n", + "-rw-r--r-- 1 root root 727 Jul 17 11:30 setup.py\n", + "drwxr-xr-x 2 root root 4096 Jul 17 11:30 tests\n", + "-rw-r--r-- 1 root root 30288272 Jul 17 11:30 train-v1.1.json\n", + "-rw-r--r-- 1 root root 42123633 Jul 17 11:30 train-v2.0.json\n", + "-rw-r--r-- 1 root root 312 Jul 17 11:30 .travis.yml\n" ], "name": "stdout" } @@ -263,7 +263,7 @@ }, "id": "umJkmO9HFf3L", "colab_type": "code", - "outputId": "2ae27930-f375-40f1-da10-3e2e75c3c416", + "outputId": "bafedcb2-6bf2-4282-8898-485e13622922", "colab": { "base_uri": "https://localhost:8080/", "height": 75 @@ -292,21 +292,21 @@ "metadata": { "id": "ylorIsqLz_J3", "colab_type": "code", + "outputId": "fb597f7d-ba49-4bc9-826b-1cd855c399ed", "colab": { "base_uri": "https://localhost:8080/", "height": 247 - }, - "outputId": "90dcdf41-69fe-4527-b0fc-c7c66b5ebb21" + } }, "source": [ "!wget https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json" ], - "execution_count": 10, + "execution_count": 8, "outputs": [ { "output_type": "stream", "text": [ - "--2019-07-16 17:24:04-- https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json\n", + "--2019-07-17 11:31:03-- https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", @@ -315,7 +315,7 @@ "\n", "\rdev-v2.0-small.json 0%[ ] 0 --.-KB/s \rdev-v2.0-small.json 100%[===================>] 8.58K --.-KB/s in 0s \n", "\n", - "2019-07-16 17:24:04 (126 MB/s) - ‘dev-v2.0-small.json’ saved [8786/8786]\n", + "2019-07-17 11:31:03 (166 MB/s) - ‘dev-v2.0-small.json’ saved [8786/8786]\n", "\n" ], "name": "stdout" @@ -325,26 +325,177 @@ { "cell_type": "code", "metadata": { - "id": "9501kcG30SGd", + "id": "sMRDXXFdaO7z", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "ec2ee20c-76ae-495b-b3df-81eb61db52c1" }, "source": [ - "# !pip install apex" + "!cat dev-v2.0-small.json" ], - "execution_count": 0, - "outputs": [] + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "{\n", + " \"version\": \"v2.0\",\n", + " \"data\": [{\n", + " \"title\": \"Normans\",\n", + " \"paragraphs\": [{\n", + " \"qas\": [{\n", + " \"question\": \"In what country is Normandy located?\",\n", + " \"id\": \"56ddde6b9a695914005b9628\",\n", + " \"answers\": [{\n", + " \"text\": \"France\",\n", + " \"answer_start\": 159\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"question\": \"When were the Normans in Normandy?\",\n", + " \"id\": \"56ddde6b9a695914005b9629\",\n", + " \"answers\": [{\n", + " \"text\": \"10th and 11th centuries\",\n", + " \"answer_start\": 94\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"question\": \"From which countries did the Norse originate?\",\n", + " \"id\": \"56ddde6b9a695914005b962a\",\n", + " \"answers\": [{\n", + " \"text\": \"Denmark, Iceland and Norway\",\n", + " \"answer_start\": 256\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"plausible_answers\": [{\n", + " \"text\": \"Rollo\",\n", + " \"answer_start\": 308\n", + " }],\n", + " \"question\": \"Who did King Charles III swear fealty to?\",\n", + " \"id\": \"5ad39d53604f3c001a3fe8d3\",\n", + " \"answers\": [],\n", + " \"is_impossible\": true\n", + " }, {\n", + " \"plausible_answers\": [{\n", + " \"text\": \"10th century\",\n", + " \"answer_start\": 671\n", + " }],\n", + " \"question\": \"When did the Frankish identity emerge?\",\n", + " \"id\": \"5ad39d53604f3c001a3fe8d4\",\n", + " \"answers\": [],\n", + " \"is_impossible\": true\n", + " }],\n", + " \"context\": \"The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\\\"Norman\\\" comes from \\\"Norseman\\\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.\"\n", + " }, {\n", + " \"qas\": [{\n", + " \"question\": \"Who was the duke in the battle of Hastings?\",\n", + " \"id\": \"56dddf4066d3e219004dad5f\",\n", + " \"answers\": [{\n", + " \"text\": \"William the Conqueror\",\n", + " \"answer_start\": 1022\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"plausible_answers\": [{\n", + " \"text\": \"Antioch\",\n", + " \"answer_start\": 1295\n", + " }],\n", + " \"question\": \"What principality did William the conquerer found?\",\n", + " \"id\": \"5ad3a266604f3c001a3fea2b\",\n", + " \"answers\": [],\n", + " \"is_impossible\": true\n", + " }],\n", + " \"context\": \"The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable principality in feudal tenure. The Normans are noted both for their culture, such as their unique Romanesque architecture and musical traditions, and for their significant military accomplishments and innovations. Norman adventurers founded the Kingdom of Sicily under Roger II after conquering southern Italy on the Saracens and Byzantines, and an expedition on behalf of their duke, William the Conqueror, led to the Norman conquest of England at the Battle of Hastings in 1066. Norman cultural and military influence spread from these new European centres to the Crusader states of the Near East, where their prince Bohemond I founded the Principality of Antioch in the Levant, to Scotland and Wales in Great Britain, to Ireland, and to the coasts of north Africa and the Canary Islands.\"\n", + " }]\n", + " }, {\n", + " \"title\": \"Computational_complexity_theory\",\n", + " \"paragraphs\": [{\n", + " \"qas\": [{\n", + " \"question\": \"What branch of theoretical computer science deals with broadly classifying computational problems by difficulty and class of relationship?\",\n", + " \"id\": \"56e16182e3433e1400422e28\",\n", + " \"answers\": [{\n", + " \"text\": \"Computational complexity theory\",\n", + " \"answer_start\": 0\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"plausible_answers\": [{\n", + " \"text\": \"algorithm\",\n", + " \"answer_start\": 472\n", + " }],\n", + " \"question\": \"What is a manual application of mathematical steps?\",\n", + " \"id\": \"5ad5316b5b96ef001a10ab76\",\n", + " \"answers\": [],\n", + " \"is_impossible\": true\n", + " }],\n", + " \"context\": \"Computational complexity theory is a branch of the theory of computation in theoretical computer science that focuses on classifying computational problems according to their inherent difficulty, and relating those classes to each other. A computational problem is understood to be a task that is in principle amenable to being solved by a computer, which is equivalent to stating that the problem may be solved by mechanical application of mathematical steps, such as an algorithm.\"\n", + " }, {\n", + " \"qas\": [{\n", + " \"question\": \"What measure of a computational problem broadly defines the inherent difficulty of the solution?\",\n", + " \"id\": \"56e16839cd28a01900c67887\",\n", + " \"answers\": [{\n", + " \"text\": \"if its solution requires significant resources\",\n", + " \"answer_start\": 46\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"question\": \"What method is used to intuitively assess or quantify the amount of resources required to solve a computational problem?\",\n", + " \"id\": \"56e16839cd28a01900c67888\",\n", + " \"answers\": [{\n", + " \"text\": \"mathematical models of computation\",\n", + " \"answer_start\": 176\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"question\": \"What are two basic primary resources used to guage complexity?\",\n", + " \"id\": \"56e16839cd28a01900c67889\",\n", + " \"answers\": [{\n", + " \"text\": \"time and storage\",\n", + " \"answer_start\": 305\n", + " }],\n", + " \"is_impossible\": false\n", + " }, {\n", + " \"plausible_answers\": [{\n", + " \"text\": \"the number of gates in a circuit\",\n", + " \"answer_start\": 436\n", + " }],\n", + " \"question\": \"What unit is measured to determine circuit simplicity?\",\n", + " \"id\": \"5ad532575b96ef001a10ab7f\",\n", + " \"answers\": [],\n", + " \"is_impossible\": true\n", + " }, {\n", + " \"plausible_answers\": [{\n", + " \"text\": \"the number of processors\",\n", + " \"answer_start\": 502\n", + " }],\n", + " \"question\": \"What number is used in perpendicular computing?\",\n", + " \"id\": \"5ad532575b96ef001a10ab80\",\n", + " \"answers\": [],\n", + " \"is_impossible\": true\n", + " }],\n", + " \"context\": \"A problem is regarded as inherently difficult if its solution requires significant resources, whatever the algorithm used. The theory formalizes this intuition, by introducing mathematical models of computation to study these problems and quantifying the amount of resources needed to solve them, such as time and storage. Other complexity measures are also used, such as the amount of communication (used in communication complexity), the number of gates in a circuit (used in circuit complexity) and the number of processors (used in parallel computing). One of the roles of computational complexity theory is to determine the practical limits on what computers can and cannot do.\"\n", + " }]\n", + " }]\n", + "}" + ], + "name": "stdout" + } + ] }, { "cell_type": "code", "metadata": { "id": "owyoli60qGb9", "colab_type": "code", + "outputId": "5551e4b9-7556-49cd-83ca-f112a0aadc0d", "colab": { "base_uri": "https://localhost:8080/", - "height": 935 - }, - "outputId": "cc99edb0-f821-4afc-c347-3413f746dfa9" + "height": 1000 + } }, "source": [ "reader = Reader(train_file='dev-v2.0-small.json',\n", @@ -354,14 +505,19 @@ " fp16=False,\n", " output_dir='.')" ], - "execution_count": 15, + "execution_count": 10, "outputs": [ { "output_type": "stream", "text": [ - "07/16/2019 17:27:20 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", - "07/16/2019 17:27:20 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/16/2019 17:27:20 - INFO - pytorch_transformers.modeling_utils - Model config {\n", + "07/17/2019 11:31:18 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json not found in cache, downloading to /tmp/tmpe6r5d8ur\n", + "100%|██████████| 641/641 [00:00<00:00, 123844.90B/s]\n", + "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpe6r5d8ur to cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpe6r5d8ur\n", + "07/17/2019 11:31:19 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/17/2019 11:31:19 - INFO - pytorch_transformers.modeling_utils - Model config {\n", " \"attn_type\": \"bi\",\n", " \"bi_data\": false,\n", " \"clamp_len\": -1,\n", @@ -392,11 +548,21 @@ " \"untie_r\": true\n", "}\n", "\n", - "07/16/2019 17:27:21 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/16/2019 17:27:22 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/16/2019 17:27:26 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", - "07/16/2019 17:27:26 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", + "07/17/2019 11:31:20 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model not found in cache, downloading to /tmp/tmpn1g639tv\n", + "100%|██████████| 798011/798011 [00:01<00:00, 597312.57B/s]\n", + "07/17/2019 11:31:23 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpn1g639tv to cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/17/2019 11:31:23 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/17/2019 11:31:23 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpn1g639tv\n", + "07/17/2019 11:31:23 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/17/2019 11:31:24 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin not found in cache, downloading to /tmp/tmpcusodobh\n", + "100%|██████████| 467042463/467042463 [00:46<00:00, 10091679.98B/s]\n", + "07/17/2019 11:32:11 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpcusodobh to cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/17/2019 11:32:13 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/17/2019 11:32:13 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpcusodobh\n", + "07/17/2019 11:32:13 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/17/2019 11:32:18 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", + "07/17/2019 11:32:18 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", " do_lower_case=True, do_train=None, doc_stride=128,\n", " eval_all_checkpoints=True, evaluate_during_training=True, fp16=False,\n", " fp16_opt_level='O1', gradient_accumulation_steps=1, learning_rate=5e-05,\n", @@ -416,214 +582,214 @@ "metadata": { "id": "24eT2nuKtrqp", "colab_type": "code", + "outputId": "f15bc136-52d0-40c0-c013-c97a4623a804", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 - }, - "outputId": "9f64f34d-4782-4af3-832d-a892a7decd43" + } }, "source": [ "reader.fit(X='')" ], - "execution_count": 16, + "execution_count": 11, "outputs": [ { "output_type": "stream", "text": [ - "07/16/2019 17:27:26 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at dev-v2.0-small.json\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁in ▁what ▁country ▁is ▁nor man dy ▁located ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 25 113 234 27 2387 249 2087 798 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 61\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 63\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁ franc e\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 1\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁were ▁the ▁nor man s ▁in ▁nor man dy ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 13:0 14:1 15:1 16:1 17:2 18:2 19:2 20:2 21:2 22:3 23:3 24:3 25:3 26:3 27:3 28:4 29:4 30:5 31:5 32:5 33:5 34:6 35:6 36:6 37:7 38:7 39:7 40:8 41:9 42:10 43:11 44:12 45:13 46:14 47:14 48:15 49:16 50:16 51:17 52:18 53:19 54:20 55:21 56:22 57:22 58:22 59:22 60:23 61:24 62:25 63:26 64:26 65:26 66:26 67:27 68:28 69:29 70:30 71:31 72:31 73:32 74:32 75:32 76:32 77:32 78:32 79:33 80:34 81:35 82:35 83:35 84:35 85:35 86:35 87:35 88:36 89:36 90:37 91:38 92:39 93:40 94:40 95:40 96:40 97:41 98:41 99:42 100:43 101:43 102:44 103:44 104:45 105:46 106:47 107:48 108:48 109:48 110:49 111:50 112:51 113:52 114:52 115:52 116:53 117:54 118:55 119:55 120:55 121:56 122:56 123:57 124:58 125:59 126:59 127:59 128:59 129:60 130:61 131:62 132:63 133:64 134:65 135:66 136:67 137:68 138:69 139:69 140:70 141:71 142:71 143:71 144:71 145:71 146:71 147:72 148:72 149:73 150:74 151:75 152:76 153:77 154:78 155:79 156:80 157:80 158:80 159:80 160:80 161:81 162:82 163:83 164:84 165:84 166:84 167:84 168:85 169:86 170:87 171:88 172:89 173:90 174:91 175:92 176:93 177:93 178:93 179:94 180:95 181:96 182:97 183:98 184:99 185:100 186:101 187:102 188:102 189:103 190:103 191:104 192:105 193:106 194:107 195:108 196:109 197:110 198:111 199:112 200:112\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 55 18 2387 249 23 25 2387 249 2087 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 46\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 51\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁10 th ▁and ▁11 th ▁centuries\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 2\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁from ▁which ▁countries ▁did ▁the ▁nor se ▁originate ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 40 59 452 190 18 2387 1022 19788 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 91\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 99\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁ den mark , ▁ice land ▁and ▁nor way\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 3\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁did ▁king ▁ char les ▁ iii ▁swear ▁fe al ty ▁to ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 16:0 17:1 18:1 19:1 20:2 21:2 22:2 23:2 24:2 25:3 26:3 27:3 28:3 29:3 30:3 31:4 32:4 33:5 34:5 35:5 36:5 37:6 38:6 39:6 40:7 41:7 42:7 43:8 44:9 45:10 46:11 47:12 48:13 49:14 50:14 51:15 52:16 53:16 54:17 55:18 56:19 57:20 58:21 59:22 60:22 61:22 62:22 63:23 64:24 65:25 66:26 67:26 68:26 69:26 70:27 71:28 72:29 73:30 74:31 75:31 76:32 77:32 78:32 79:32 80:32 81:32 82:33 83:34 84:35 85:35 86:35 87:35 88:35 89:35 90:35 91:36 92:36 93:37 94:38 95:39 96:40 97:40 98:40 99:40 100:41 101:41 102:42 103:43 104:43 105:44 106:44 107:45 108:46 109:47 110:48 111:48 112:48 113:49 114:50 115:51 116:52 117:52 118:52 119:53 120:54 121:55 122:55 123:55 124:56 125:56 126:57 127:58 128:59 129:59 130:59 131:59 132:60 133:61 134:62 135:63 136:64 137:65 138:66 139:67 140:68 141:69 142:69 143:70 144:71 145:71 146:71 147:71 148:71 149:71 150:72 151:72 152:73 153:74 154:75 155:76 156:77 157:78 158:79 159:80 160:80 161:80 162:80 163:80 164:81 165:82 166:83 167:84 168:84 169:84 170:84 171:85 172:86 173:87 174:88 175:89 176:90 177:91 178:92 179:93 180:93 181:93 182:94 183:95 184:96 185:97 186:98 187:99 188:100 189:101 190:102 191:102 192:103 193:103 194:104 195:105 196:106 197:107 198:108 199:109 200:110 201:111 202:112 203:112\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 190 3351 17 6628 1890 17 28488 13650 9151 212 982 22 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 4\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁did ▁the ▁frank ish ▁identity ▁emerge ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:1 13:1 14:2 15:2 16:2 17:2 18:2 19:3 20:3 21:3 22:3 23:3 24:3 25:4 26:4 27:5 28:5 29:5 30:5 31:6 32:6 33:6 34:7 35:7 36:7 37:8 38:9 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:16 48:17 49:18 50:19 51:20 52:21 53:22 54:22 55:22 56:22 57:23 58:24 59:25 60:26 61:26 62:26 63:26 64:27 65:28 66:29 67:30 68:31 69:31 70:32 71:32 72:32 73:32 74:32 75:32 76:33 77:34 78:35 79:35 80:35 81:35 82:35 83:35 84:35 85:36 86:36 87:37 88:38 89:39 90:40 91:40 92:40 93:40 94:41 95:41 96:42 97:43 98:43 99:44 100:44 101:45 102:46 103:47 104:48 105:48 106:48 107:49 108:50 109:51 110:52 111:52 112:52 113:53 114:54 115:55 116:55 117:55 118:56 119:56 120:57 121:58 122:59 123:59 124:59 125:59 126:60 127:61 128:62 129:63 130:64 131:65 132:66 133:67 134:68 135:69 136:69 137:70 138:71 139:71 140:71 141:71 142:71 143:71 144:72 145:72 146:73 147:74 148:75 149:76 150:77 151:78 152:79 153:80 154:80 155:80 156:80 157:80 158:81 159:82 160:83 161:84 162:84 163:84 164:84 165:85 166:86 167:87 168:88 169:89 170:90 171:91 172:92 173:93 174:93 175:93 176:94 177:95 178:96 179:97 180:98 181:99 182:100 183:101 184:102 185:102 186:103 187:103 188:104 189:105 190:106 191:107 192:108 193:109 194:110 195:111 196:112 197:112\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 190 18 23675 1406 3643 7624 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 5\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁was ▁the ▁duke ▁in ▁the ▁battle ▁of ▁has ting s ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 30 18 25950 25 18 1727 20 51 1203 23 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - start_position: 246\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - end_position: 251\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - answer: ▁ william ▁the ▁con quer or\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 6\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁principal ity ▁did ▁ william ▁the ▁con quer er ▁found ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3824 769 190 17 31499 18 2147 10792 118 255 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - example_index: 7\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁branch ▁of ▁theoretical ▁computer ▁science ▁deals ▁with ▁broadly ▁classify ing ▁computational ▁problems ▁by ▁difficulty ▁and ▁class ▁of ▁relationship ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 22:0 23:1 24:2 25:3 26:4 27:5 28:6 29:7 30:8 31:9 32:10 33:11 34:12 35:13 36:14 37:15 38:16 39:17 40:18 41:18 42:19 43:20 44:21 45:22 46:23 47:24 48:25 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:32 57:32 58:33 59:34 60:35 61:36 62:37 63:38 64:39 65:40 66:41 67:42 68:43 69:44 70:45 71:46 72:46 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:64 93:65 94:66 95:67 96:68 97:69 98:69 99:70 100:71 101:72 102:73 103:73\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - token_is_max_context: 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3709 20 13109 920 1767 4108 33 16026 27871 56 23228 708 37 6157 21 1075 20 1498 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:26 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 22\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 24\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁computational ▁complexity ▁theory\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 8\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁is ▁a ▁manual ▁application ▁of ▁mathematical ▁steps ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:13 25:14 26:15 27:16 28:17 29:18 30:18 31:19 32:20 33:21 34:22 35:23 36:24 37:25 38:25 39:26 40:27 41:28 42:29 43:30 44:31 45:32 46:32 47:33 48:34 49:35 50:36 51:37 52:38 53:39 54:40 55:41 56:42 57:43 58:44 59:45 60:46 61:46 62:46 63:47 64:48 65:49 66:50 67:51 68:52 69:52 70:53 71:54 72:55 73:56 74:57 75:58 76:59 77:60 78:61 79:62 80:63 81:64 82:65 83:66 84:67 85:68 86:69 87:69 88:70 89:71 90:72 91:73 92:73\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 27 24 6403 1479 20 12956 2094 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 9\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁measure ▁of ▁a ▁computational ▁problem ▁broadly ▁defines ▁the ▁inherent ▁difficulty ▁of ▁the ▁solution ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 17:0 18:1 19:2 20:3 21:4 22:5 23:6 24:7 25:8 26:9 27:10 28:11 29:12 30:12 31:13 32:14 33:15 34:16 35:16 36:17 37:18 38:19 39:19 40:20 41:21 42:21 43:22 44:23 45:24 46:25 47:26 48:27 49:28 50:29 51:30 52:31 53:32 54:33 55:33 56:34 57:35 58:36 59:37 60:38 61:39 62:40 63:41 64:41 65:42 66:43 67:44 68:45 69:46 70:46 71:47 72:48 73:49 74:50 75:51 76:52 77:52 78:53 79:54 80:55 81:56 82:57 83:58 84:59 85:59 86:59 87:60 88:61 89:62 90:62 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:69 99:70 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:75 108:76 109:77 110:78 111:79 112:79 113:79 114:80 115:81 116:82 117:82 118:82 119:83 120:84 121:85 122:86 123:87 124:88 125:89 126:90 127:91 128:92 129:93 130:94 131:95 132:96 133:97 134:98 135:99 136:100 137:101 138:102 139:103 140:103\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2310 20 24 23228 662 16026 14668 18 16507 6157 20 18 1938 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 24\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 29\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁if ▁its ▁solution ▁requires ▁significant ▁resources\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 10\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁method ▁is ▁used ▁to ▁in tu itive ly ▁assess ▁or ▁quantify ▁the ▁amount ▁of ▁resources ▁required ▁to ▁solve ▁a ▁computational ▁problem ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 25:0 26:1 27:2 28:3 29:4 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:12 39:13 40:14 41:15 42:16 43:16 44:17 45:18 46:19 47:19 48:20 49:21 50:21 51:22 52:23 53:24 54:25 55:26 56:27 57:28 58:29 59:30 60:31 61:32 62:33 63:33 64:34 65:35 66:36 67:37 68:38 69:39 70:40 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:46 79:47 80:48 81:49 82:50 83:51 84:52 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:59 94:59 95:60 96:61 97:62 98:62 99:62 100:63 101:64 102:65 103:66 104:67 105:68 106:69 107:70 108:70 109:70 110:71 111:72 112:73 113:73 114:74 115:75 116:76 117:77 118:78 119:79 120:79 121:79 122:80 123:81 124:82 125:82 126:82 127:83 128:84 129:85 130:86 131:87 132:88 133:89 134:90 135:91 136:92 137:93 138:94 139:95 140:96 141:97 142:98 143:99 144:100 145:101 146:102 147:103 148:103\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2175 27 179 22 25 2853 9736 111 7329 49 30299 18 1065 20 1485 978 22 4929 24 23228 662 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 53\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 56\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁mathematical ▁models ▁of ▁computation\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000011\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 11\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁are ▁two ▁basic ▁primary ▁resources ▁used ▁to ▁ gu age ▁complexity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:6 22:7 23:8 24:9 25:10 26:11 27:12 28:12 29:13 30:14 31:15 32:16 33:16 34:17 35:18 36:19 37:19 38:20 39:21 40:21 41:22 42:23 43:24 44:25 45:26 46:27 47:28 48:29 49:30 50:31 51:32 52:33 53:33 54:34 55:35 56:36 57:37 58:38 59:39 60:40 61:41 62:41 63:42 64:43 65:44 66:45 67:46 68:46 69:47 70:48 71:49 72:50 73:51 74:52 75:52 76:53 77:54 78:55 79:56 80:57 81:58 82:59 83:59 84:59 85:60 86:61 87:62 88:62 89:62 90:63 91:64 92:65 93:66 94:67 95:68 96:69 97:70 98:70 99:70 100:71 101:72 102:73 103:73 104:74 105:75 106:76 107:77 108:78 109:79 110:79 111:79 112:80 113:81 114:82 115:82 116:82 117:83 118:84 119:85 120:86 121:87 122:88 123:89 124:90 125:91 126:92 127:93 128:94 129:95 130:96 131:97 132:98 133:99 134:100 135:101 136:102 137:103 138:103\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 41 87 1949 1827 1485 179 22 17 3017 981 11906 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - start_position: 65\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - end_position: 67\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - answer: ▁time ▁and ▁storage\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000012\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 12\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁unit ▁is ▁measured ▁to ▁determine ▁circuit ▁simplicity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:12 25:13 26:14 27:15 28:16 29:16 30:17 31:18 32:19 33:19 34:20 35:21 36:21 37:22 38:23 39:24 40:25 41:26 42:27 43:28 44:29 45:30 46:31 47:32 48:33 49:33 50:34 51:35 52:36 53:37 54:38 55:39 56:40 57:41 58:41 59:42 60:43 61:44 62:45 63:46 64:46 65:47 66:48 67:49 68:50 69:51 70:52 71:52 72:53 73:54 74:55 75:56 76:57 77:58 78:59 79:59 80:59 81:60 82:61 83:62 84:62 85:62 86:63 87:64 88:65 89:66 90:67 91:68 92:69 93:70 94:70 95:70 96:71 97:72 98:73 99:73 100:74 101:75 102:76 103:77 104:78 105:79 106:79 107:79 108:80 109:81 110:82 111:82 112:82 113:83 114:84 115:85 116:86 117:87 118:88 119:89 120:90 121:91 122:92 123:93 124:94 125:95 126:96 127:97 128:98 129:99 130:100 131:101 132:102 133:103 134:103\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 1591 27 7375 22 2081 5034 18950 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - unique_id: 1000000013\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - example_index: 13\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁number ▁is ▁used ▁in ▁perpendicular ▁computing ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 18:8 19:9 20:10 21:11 22:12 23:12 24:13 25:14 26:15 27:16 28:16 29:17 30:18 31:19 32:19 33:20 34:21 35:21 36:22 37:23 38:24 39:25 40:26 41:27 42:28 43:29 44:30 45:31 46:32 47:33 48:33 49:34 50:35 51:36 52:37 53:38 54:39 55:40 56:41 57:41 58:42 59:43 60:44 61:45 62:46 63:46 64:47 65:48 66:49 67:50 68:51 69:52 70:52 71:53 72:54 73:55 74:56 75:57 76:58 77:59 78:59 79:59 80:60 81:61 82:62 83:62 84:62 85:63 86:64 87:65 88:66 89:67 90:68 91:69 92:70 93:70 94:70 95:71 96:72 97:73 98:73 99:74 100:75 101:76 102:77 103:78 104:79 105:79 106:79 107:80 108:81 109:82 110:82 111:82 112:83 113:84 114:85 115:86 116:87 117:88 118:89 119:90 120:91 121:92 122:93 123:94 124:95 125:96 126:97 127:98 128:99 129:100 130:101 131:102 132:103 133:103\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 243 27 179 25 30525 9848 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_train_xlnet-base-cased_384\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - ***** Running training *****\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Num examples = 14\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Num Epochs = 3\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Instantaneous batch size per GPU = 8\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Total train batch size (w. parallel, distributed & accumulation) = 8\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Gradient Accumulation steps = 1\n", - "07/16/2019 17:27:27 - INFO - cdqa.reader.reader_sklearn - Total optimization steps = 6\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at dev-v2.0-small.json\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁in ▁what ▁country ▁is ▁nor man dy ▁located ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 25 113 234 27 2387 249 2087 798 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 61\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 63\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁ franc e\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 1\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁were ▁the ▁nor man s ▁in ▁nor man dy ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 13:0 14:1 15:1 16:1 17:2 18:2 19:2 20:2 21:2 22:3 23:3 24:3 25:3 26:3 27:3 28:4 29:4 30:5 31:5 32:5 33:5 34:6 35:6 36:6 37:7 38:7 39:7 40:8 41:9 42:10 43:11 44:12 45:13 46:14 47:14 48:15 49:16 50:16 51:17 52:18 53:19 54:20 55:21 56:22 57:22 58:22 59:22 60:23 61:24 62:25 63:26 64:26 65:26 66:26 67:27 68:28 69:29 70:30 71:31 72:31 73:32 74:32 75:32 76:32 77:32 78:32 79:33 80:34 81:35 82:35 83:35 84:35 85:35 86:35 87:35 88:36 89:36 90:37 91:38 92:39 93:40 94:40 95:40 96:40 97:41 98:41 99:42 100:43 101:43 102:44 103:44 104:45 105:46 106:47 107:48 108:48 109:48 110:49 111:50 112:51 113:52 114:52 115:52 116:53 117:54 118:55 119:55 120:55 121:56 122:56 123:57 124:58 125:59 126:59 127:59 128:59 129:60 130:61 131:62 132:63 133:64 134:65 135:66 136:67 137:68 138:69 139:69 140:70 141:71 142:71 143:71 144:71 145:71 146:71 147:72 148:72 149:73 150:74 151:75 152:76 153:77 154:78 155:79 156:80 157:80 158:80 159:80 160:80 161:81 162:82 163:83 164:84 165:84 166:84 167:84 168:85 169:86 170:87 171:88 172:89 173:90 174:91 175:92 176:93 177:93 178:93 179:94 180:95 181:96 182:97 183:98 184:99 185:100 186:101 187:102 188:102 189:103 190:103 191:104 192:105 193:106 194:107 195:108 196:109 197:110 198:111 199:112 200:112\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 55 18 2387 249 23 25 2387 249 2087 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 46\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 51\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁10 th ▁and ▁11 th ▁centuries\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 2\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁from ▁which ▁countries ▁did ▁the ▁nor se ▁originate ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 40 59 452 190 18 2387 1022 19788 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 91\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 99\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁ den mark , ▁ice land ▁and ▁nor way\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 3\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁did ▁king ▁ char les ▁ iii ▁swear ▁fe al ty ▁to ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 16:0 17:1 18:1 19:1 20:2 21:2 22:2 23:2 24:2 25:3 26:3 27:3 28:3 29:3 30:3 31:4 32:4 33:5 34:5 35:5 36:5 37:6 38:6 39:6 40:7 41:7 42:7 43:8 44:9 45:10 46:11 47:12 48:13 49:14 50:14 51:15 52:16 53:16 54:17 55:18 56:19 57:20 58:21 59:22 60:22 61:22 62:22 63:23 64:24 65:25 66:26 67:26 68:26 69:26 70:27 71:28 72:29 73:30 74:31 75:31 76:32 77:32 78:32 79:32 80:32 81:32 82:33 83:34 84:35 85:35 86:35 87:35 88:35 89:35 90:35 91:36 92:36 93:37 94:38 95:39 96:40 97:40 98:40 99:40 100:41 101:41 102:42 103:43 104:43 105:44 106:44 107:45 108:46 109:47 110:48 111:48 112:48 113:49 114:50 115:51 116:52 117:52 118:52 119:53 120:54 121:55 122:55 123:55 124:56 125:56 126:57 127:58 128:59 129:59 130:59 131:59 132:60 133:61 134:62 135:63 136:64 137:65 138:66 139:67 140:68 141:69 142:69 143:70 144:71 145:71 146:71 147:71 148:71 149:71 150:72 151:72 152:73 153:74 154:75 155:76 156:77 157:78 158:79 159:80 160:80 161:80 162:80 163:80 164:81 165:82 166:83 167:84 168:84 169:84 170:84 171:85 172:86 173:87 174:88 175:89 176:90 177:91 178:92 179:93 180:93 181:93 182:94 183:95 184:96 185:97 186:98 187:99 188:100 189:101 190:102 191:102 192:103 193:103 194:104 195:105 196:106 197:107 198:108 199:109 200:110 201:111 202:112 203:112\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 190 3351 17 6628 1890 17 28488 13650 9151 212 982 22 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 4\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁did ▁the ▁frank ish ▁identity ▁emerge ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:1 13:1 14:2 15:2 16:2 17:2 18:2 19:3 20:3 21:3 22:3 23:3 24:3 25:4 26:4 27:5 28:5 29:5 30:5 31:6 32:6 33:6 34:7 35:7 36:7 37:8 38:9 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:16 48:17 49:18 50:19 51:20 52:21 53:22 54:22 55:22 56:22 57:23 58:24 59:25 60:26 61:26 62:26 63:26 64:27 65:28 66:29 67:30 68:31 69:31 70:32 71:32 72:32 73:32 74:32 75:32 76:33 77:34 78:35 79:35 80:35 81:35 82:35 83:35 84:35 85:36 86:36 87:37 88:38 89:39 90:40 91:40 92:40 93:40 94:41 95:41 96:42 97:43 98:43 99:44 100:44 101:45 102:46 103:47 104:48 105:48 106:48 107:49 108:50 109:51 110:52 111:52 112:52 113:53 114:54 115:55 116:55 117:55 118:56 119:56 120:57 121:58 122:59 123:59 124:59 125:59 126:60 127:61 128:62 129:63 130:64 131:65 132:66 133:67 134:68 135:69 136:69 137:70 138:71 139:71 140:71 141:71 142:71 143:71 144:72 145:72 146:73 147:74 148:75 149:76 150:77 151:78 152:79 153:80 154:80 155:80 156:80 157:80 158:81 159:82 160:83 161:84 162:84 163:84 164:84 165:85 166:86 167:87 168:88 169:89 170:90 171:91 172:92 173:93 174:93 175:93 176:94 177:95 178:96 179:97 180:98 181:99 182:100 183:101 184:102 185:102 186:103 187:103 188:104 189:105 190:106 191:107 192:108 193:109 194:110 195:111 196:112 197:112\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 190 18 23675 1406 3643 7624 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 5\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁was ▁the ▁duke ▁in ▁the ▁battle ▁of ▁has ting s ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 30 18 25950 25 18 1727 20 51 1203 23 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 246\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 251\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁ william ▁the ▁con quer or\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 6\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁principal ity ▁did ▁ william ▁the ▁con quer er ▁found ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3824 769 190 17 31499 18 2147 10792 118 255 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 7\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁branch ▁of ▁theoretical ▁computer ▁science ▁deals ▁with ▁broadly ▁classify ing ▁computational ▁problems ▁by ▁difficulty ▁and ▁class ▁of ▁relationship ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 22:0 23:1 24:2 25:3 26:4 27:5 28:6 29:7 30:8 31:9 32:10 33:11 34:12 35:13 36:14 37:15 38:16 39:17 40:18 41:18 42:19 43:20 44:21 45:22 46:23 47:24 48:25 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:32 57:32 58:33 59:34 60:35 61:36 62:37 63:38 64:39 65:40 66:41 67:42 68:43 69:44 70:45 71:46 72:46 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:64 93:65 94:66 95:67 96:68 97:69 98:69 99:70 100:71 101:72 102:73 103:73\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3709 20 13109 920 1767 4108 33 16026 27871 56 23228 708 37 6157 21 1075 20 1498 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 22\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 24\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁computational ▁complexity ▁theory\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 8\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁is ▁a ▁manual ▁application ▁of ▁mathematical ▁steps ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:13 25:14 26:15 27:16 28:17 29:18 30:18 31:19 32:20 33:21 34:22 35:23 36:24 37:25 38:25 39:26 40:27 41:28 42:29 43:30 44:31 45:32 46:32 47:33 48:34 49:35 50:36 51:37 52:38 53:39 54:40 55:41 56:42 57:43 58:44 59:45 60:46 61:46 62:46 63:47 64:48 65:49 66:50 67:51 68:52 69:52 70:53 71:54 72:55 73:56 74:57 75:58 76:59 77:60 78:61 79:62 80:63 81:64 82:65 83:66 84:67 85:68 86:69 87:69 88:70 89:71 90:72 91:73 92:73\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 27 24 6403 1479 20 12956 2094 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 9\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁measure ▁of ▁a ▁computational ▁problem ▁broadly ▁defines ▁the ▁inherent ▁difficulty ▁of ▁the ▁solution ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 17:0 18:1 19:2 20:3 21:4 22:5 23:6 24:7 25:8 26:9 27:10 28:11 29:12 30:12 31:13 32:14 33:15 34:16 35:16 36:17 37:18 38:19 39:19 40:20 41:21 42:21 43:22 44:23 45:24 46:25 47:26 48:27 49:28 50:29 51:30 52:31 53:32 54:33 55:33 56:34 57:35 58:36 59:37 60:38 61:39 62:40 63:41 64:41 65:42 66:43 67:44 68:45 69:46 70:46 71:47 72:48 73:49 74:50 75:51 76:52 77:52 78:53 79:54 80:55 81:56 82:57 83:58 84:59 85:59 86:59 87:60 88:61 89:62 90:62 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:69 99:70 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:75 108:76 109:77 110:78 111:79 112:79 113:79 114:80 115:81 116:82 117:82 118:82 119:83 120:84 121:85 122:86 123:87 124:88 125:89 126:90 127:91 128:92 129:93 130:94 131:95 132:96 133:97 134:98 135:99 136:100 137:101 138:102 139:103 140:103\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2310 20 24 23228 662 16026 14668 18 16507 6157 20 18 1938 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 24\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 29\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁if ▁its ▁solution ▁requires ▁significant ▁resources\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 10\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁method ▁is ▁used ▁to ▁in tu itive ly ▁assess ▁or ▁quantify ▁the ▁amount ▁of ▁resources ▁required ▁to ▁solve ▁a ▁computational ▁problem ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 25:0 26:1 27:2 28:3 29:4 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:12 39:13 40:14 41:15 42:16 43:16 44:17 45:18 46:19 47:19 48:20 49:21 50:21 51:22 52:23 53:24 54:25 55:26 56:27 57:28 58:29 59:30 60:31 61:32 62:33 63:33 64:34 65:35 66:36 67:37 68:38 69:39 70:40 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:46 79:47 80:48 81:49 82:50 83:51 84:52 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:59 94:59 95:60 96:61 97:62 98:62 99:62 100:63 101:64 102:65 103:66 104:67 105:68 106:69 107:70 108:70 109:70 110:71 111:72 112:73 113:73 114:74 115:75 116:76 117:77 118:78 119:79 120:79 121:79 122:80 123:81 124:82 125:82 126:82 127:83 128:84 129:85 130:86 131:87 132:88 133:89 134:90 135:91 136:92 137:93 138:94 139:95 140:96 141:97 142:98 143:99 144:100 145:101 146:102 147:103 148:103\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2175 27 179 22 25 2853 9736 111 7329 49 30299 18 1065 20 1485 978 22 4929 24 23228 662 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 53\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 56\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁mathematical ▁models ▁of ▁computation\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000011\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 11\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁are ▁two ▁basic ▁primary ▁resources ▁used ▁to ▁ gu age ▁complexity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:6 22:7 23:8 24:9 25:10 26:11 27:12 28:12 29:13 30:14 31:15 32:16 33:16 34:17 35:18 36:19 37:19 38:20 39:21 40:21 41:22 42:23 43:24 44:25 45:26 46:27 47:28 48:29 49:30 50:31 51:32 52:33 53:33 54:34 55:35 56:36 57:37 58:38 59:39 60:40 61:41 62:41 63:42 64:43 65:44 66:45 67:46 68:46 69:47 70:48 71:49 72:50 73:51 74:52 75:52 76:53 77:54 78:55 79:56 80:57 81:58 82:59 83:59 84:59 85:60 86:61 87:62 88:62 89:62 90:63 91:64 92:65 93:66 94:67 95:68 96:69 97:70 98:70 99:70 100:71 101:72 102:73 103:73 104:74 105:75 106:76 107:77 108:78 109:79 110:79 111:79 112:80 113:81 114:82 115:82 116:82 117:83 118:84 119:85 120:86 121:87 122:88 123:89 124:90 125:91 126:92 127:93 128:94 129:95 130:96 131:97 132:98 133:99 134:100 135:101 136:102 137:103 138:103\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 41 87 1949 1827 1485 179 22 17 3017 981 11906 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 65\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 67\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁time ▁and ▁storage\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000012\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 12\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁unit ▁is ▁measured ▁to ▁determine ▁circuit ▁simplicity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:12 25:13 26:14 27:15 28:16 29:16 30:17 31:18 32:19 33:19 34:20 35:21 36:21 37:22 38:23 39:24 40:25 41:26 42:27 43:28 44:29 45:30 46:31 47:32 48:33 49:33 50:34 51:35 52:36 53:37 54:38 55:39 56:40 57:41 58:41 59:42 60:43 61:44 62:45 63:46 64:46 65:47 66:48 67:49 68:50 69:51 70:52 71:52 72:53 73:54 74:55 75:56 76:57 77:58 78:59 79:59 80:59 81:60 82:61 83:62 84:62 85:62 86:63 87:64 88:65 89:66 90:67 91:68 92:69 93:70 94:70 95:70 96:71 97:72 98:73 99:73 100:74 101:75 102:76 103:77 104:78 105:79 106:79 107:79 108:80 109:81 110:82 111:82 112:82 113:83 114:84 115:85 116:86 117:87 118:88 119:89 120:90 121:91 122:92 123:93 124:94 125:95 126:96 127:97 128:98 129:99 130:100 131:101 132:102 133:103 134:103\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 1591 27 7375 22 2081 5034 18950 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000013\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 13\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁number ▁is ▁used ▁in ▁perpendicular ▁computing ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 18:8 19:9 20:10 21:11 22:12 23:12 24:13 25:14 26:15 27:16 28:16 29:17 30:18 31:19 32:19 33:20 34:21 35:21 36:22 37:23 38:24 39:25 40:26 41:27 42:28 43:29 44:30 45:31 46:32 47:33 48:33 49:34 50:35 51:36 52:37 53:38 54:39 55:40 56:41 57:41 58:42 59:43 60:44 61:45 62:46 63:46 64:47 65:48 66:49 67:50 68:51 69:52 70:52 71:53 72:54 73:55 74:56 75:57 76:58 77:59 78:59 79:59 80:60 81:61 82:62 83:62 84:62 85:63 86:64 87:65 88:66 89:67 90:68 91:69 92:70 93:70 94:70 95:71 96:72 97:73 98:73 99:74 100:75 101:76 102:77 103:78 104:79 105:79 106:79 107:80 108:81 109:82 110:82 111:82 112:83 113:84 114:85 115:86 116:87 117:88 118:89 119:90 120:91 121:92 122:93 123:94 124:95 125:96 126:97 127:98 128:99 129:100 130:101 131:102 132:103 133:103\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 243 27 179 25 30525 9848 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_train_xlnet-base-cased_384\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - ***** Running training *****\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Num examples = 14\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Num Epochs = 3\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Instantaneous batch size per GPU = 8\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Gradient Accumulation steps = 1\n", + "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Total optimization steps = 6\n", "Epoch: 0%| | 0/3 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetitlecategorylinkabstractparagraphs
013.05.2019The banking jobs : Assistant Vice President – ...Careershttps://group.bnpparibas/en/news/banking-jobs-...Within the Group’s Corporate and Institutional...[I manage a team in charge of designing and im...
113.05.2019BNP Paribas at #VivaTech : discover the progra...Innovationhttps://group.bnpparibas/en/news/bnp-paribas-v...From Thursday 16 to Saturday 18 May 2019, join...[With François Hollande, Chairman of French fo...
213.05.2019\"The bank with an IT budget of more than EUR6 ...Grouphttps://group.bnpparibas/en/news/the-bank-budg...Interview with Jean-Laurent Bonnafé, Director ...[We did the groundwork between 2012 and 2016, ...
310.05.2019BNP Paribas at #VivaTech : discover the progra...Innovationhttps://group.bnpparibas/en/news/bnp-paribas-v...From Thursday 16 to Saturday 18 May 2019, join...[As part of the ‘United Tech of Europe’ theme,...
410.05.2019When Artificial Intelligence participates in r...Careershttps://group.bnpparibas/en/news/artificial-in...As the competition to attract talent intensifi...[Online recruitment is already the norm. Accor...
\n", + "" + ], "text/plain": [ - "{'HasAns_exact': 0.0,\n", - " 'HasAns_f1': 28.426053113553113,\n", - " 'HasAns_total': 8,\n", - " 'NoAns_exact': 0.0,\n", - " 'NoAns_f1': 0.0,\n", - " 'NoAns_total': 6,\n", - " 'best_exact': 42.857142857142854,\n", - " 'best_exact_thresh': 0.0,\n", - " 'best_f1': 42.857142857142854,\n", - " 'best_f1_thresh': 0.0,\n", - " 'exact': 0.0,\n", - " 'f1': 16.24345892203035,\n", - " 'total': 14}" + " date ... paragraphs\n", + "0 13.05.2019 ... [I manage a team in charge of designing and im...\n", + "1 13.05.2019 ... [With François Hollande, Chairman of French fo...\n", + "2 13.05.2019 ... [We did the groundwork between 2012 and 2016, ...\n", + "3 10.05.2019 ... [As part of the ‘United Tech of Europe’ theme,...\n", + "4 10.05.2019 ... [Online recruitment is already the norm. Accor...\n", + "\n", + "[5 rows x 6 columns]" ] }, "metadata": { "tags": [] }, - "execution_count": 17 + "execution_count": 14 } ] }, { "cell_type": "code", "metadata": { - "id": "Xm0BpYEs23AN", + "id": "rBljRU1gaJ8l", + "colab_type": "code", + "colab": {} + }, + "source": [ + "query = 'Since when does the Excellence Program of BNP Paribas exist?'" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "KQ9Be2rzZYQb", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 705 + "height": 170 }, - "outputId": "dc057da5-917b-48ca-c4aa-cbeb1e37c6e8" + "outputId": "e36872d2-597c-4064-b1e4-953c02e40a5c" }, "source": [ - "!ls -la" + "from cdqa.utils.converters import generate_squad_examples\n", + "from cdqa.retriever.tfidf_sklearn import TfidfRetriever\n", + "\n", + "metadata = df\n", + "metadata['content'] = metadata['paragraphs'].apply(lambda x: ' '.join(x))\n", + "\n", + "retriever = TfidfRetriever(verbose=True)\n", + "retriever.fit(metadata['content'])\n", + "closest_docs_indices = retriever.predict(query, metadata=metadata)" ], - "execution_count": 22, + "execution_count": 16, "outputs": [ { "output_type": "stream", "text": [ - "total 546076\n", - "drwxr-xr-x 9 root root 4096 Jul 16 17:37 .\n", - "drwxr-xr-x 1 root root 4096 Jul 16 17:18 ..\n", - "-rw-r--r-- 1 root root 2 Jul 16 17:33 added_tokens.json\n", - "-rw-r--r-- 1 root root 815 Jul 16 17:18 api.py\n", - "-rw-r--r-- 1 root root 22 Jul 16 17:18 apt.txt\n", - "-rw-r--r-- 1 root root 101108 Jul 16 17:36 cached_dev_xlnet-base-cased_384\n", - "-rw-r--r-- 1 root root 101136 Jul 16 17:27 cached_train_xlnet-base-cased_384\n", - "drwxr-xr-x 7 root root 4096 Jul 16 17:19 cdqa\n", - "drwxr-xr-x 2 root root 4096 Jul 16 17:19 cdqa.egg-info\n", - "-rw-r--r-- 1 root root 641 Jul 16 17:33 config.json\n", - "-rw-r--r-- 1 root root 4854279 Jul 16 17:19 dev-v1.1.json\n", - "-rw-r--r-- 1 root root 4370528 Jul 16 17:19 dev-v2.0.json\n", - "-rw-r--r-- 1 root root 8786 Jul 16 17:24 dev-v2.0-small.json\n", - "-rw-r--r-- 1 root root 1452 Jul 16 17:18 download.py\n", - "drwxr-xr-x 2 root root 4096 Jul 16 17:18 examples\n", - "drwxr-xr-x 8 root root 4096 Jul 16 17:18 .git\n", - "drwxr-xr-x 3 root root 4096 Jul 16 17:18 .github\n", - "-rw-r--r-- 1 root root 1375 Jul 16 17:18 .gitignore\n", - "-rw-r--r-- 1 root root 11356 Jul 16 17:18 LICENSE\n", - "-rw-r--r-- 1 root root 40 Jul 16 17:18 MANIFEST.in\n", - "-rw-r--r-- 1 root root 13572 Jul 16 17:37 nbest_predictions_.json\n", - "-rw-r--r-- 1 root root 759 Jul 16 17:37 null_odds_.json\n", - "-rw-r--r-- 1 root root 1212 Jul 16 17:37 predictions_.json\n", - "-rw-r--r-- 1 root root 476372095 Jul 16 17:33 pytorch_model.bin\n", - "-rw-r--r-- 1 root root 12855 Jul 16 17:18 README.md\n", - "-rw-r--r-- 1 root root 129 Jul 16 17:18 requirements.txt\n", - "drwxr-xr-x 4 root root 4096 Jul 16 17:27 runs\n", - "-rw-r--r-- 1 root root 727 Jul 16 17:18 setup.py\n", - "-rw-r--r-- 1 root root 202 Jul 16 17:33 special_tokens_map.json\n", - "-rw-r--r-- 1 root root 798011 Jul 16 17:33 spiece.model\n", - "drwxr-xr-x 2 root root 4096 Jul 16 17:18 tests\n", - "-rw-r--r-- 1 root root 1244 Jul 16 17:33 training_args.bin\n", - "-rw-r--r-- 1 root root 30288272 Jul 16 17:19 train-v1.1.json\n", - "-rw-r--r-- 1 root root 42123633 Jul 16 17:19 train-v2.0.json\n", - "-rw-r--r-- 1 root root 312 Jul 16 17:18 .travis.yml\n" + "+------+-------+-----------------------------------------------------+\n", + "| rank | index | title |\n", + "+------+-------+-----------------------------------------------------+\n", + "| 1 | 416 | BNP Paribas’ commitment to universities and schools |\n", + "| 2 | 146 | BNP Paribas Graduate Programs in France |\n", + "| 3 | 881 | Making the most of your VIE! |\n", + "+------+-------+-----------------------------------------------------+\n", + "Time: 0.00622 seconds\n" ], "name": "stdout" } @@ -955,16 +1231,452 @@ { "cell_type": "code", "metadata": { - "ExecuteTime": { - "end_time": "2019-06-25T14:21:26.472449Z", - "start_time": "2019-06-25T14:21:11.427052Z" + "id": "EIAlFnv_aLO_", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 }, - "id": "NIwNr1DdFf3X", + "outputId": "ef4ac49c-9d68-4818-81bc-de4c9ab41f62" + }, + "source": [ + "squad_examples = generate_squad_examples(question=query,\n", + " closest_docs_indices=closest_docs_indices,\n", + " metadata=metadata)" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "3it [00:00, 959.06it/s]\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WEJaWWo3cRib", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 935 + }, + "outputId": "5c02b603-c6ee-4fab-c6a5-a724ecd2750c" + }, + "source": [ + "squad_examples" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'paragraphs': [{'context': 'BNP Paribas has long maintained a strong relationship with the academic world and target schools in order to attract its future talent, whether it be universities, business schools or engineering schools. The Group serves as a committed partner of the academic world. It also plays a role in curriculum planning by updating coursework so that it matches actual business needs as closely as possible.',\n", + " 'qas': [{'answers': [],\n", + " 'id': '1d0fc3a5-8499-4f9e-a247-689246c9f21a',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'To promote the bank’s businesses among students and recruit high-potential candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France match the needs of its businesses, departments and subsidiaries. In other words, the role of Campus Management is to develop quality relationships with students and promote the Group’s businesses. In the words of Jean-Dominique Criscuolo, Manager of Partnerships and Relations with Schools and Universities, “We strongly believe that our Academic Relations play a growing role in transforming our Group.”',\n", + " 'qas': [{'answers': [],\n", + " 'id': 'bbfc511f-f6f8-44ea-a670-c244e78674e2',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'According to Jean-Dominique Criscuolo, \"BNP Paribas comprises some 300 businesses, some of which only emerged within the last two years. Including data scientists, agile coaches, and IT inspectors, the new professions created by the digital transformation broaden the palette of the Group’s traditional businesses. In a changing world, governed by rapidly evolving international financial regulations, many new opportunities are available within our Compliance teams. This is a dynamic profession which, due to regulatory, geopolitical and societal changes, plays an increasingly central role in the crossroads of strategy and the daily actions of the bank and its customers.”',\n", + " 'qas': [{'answers': [],\n", + " 'id': '53c2b252-0977-4b13-9671-a5f72fca2929',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'This large-scale project will further expand in coming years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB, observes that “this partnership aims to become one of the leading research bodies in this immensely disruptive technology. It will also play a role in transforming the financial sector through publications and major events like VivaTech.”',\n", + " 'qas': [{'answers': [],\n", + " 'id': 'f5a14933-debb-4763-9963-bc28630ec803',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.',\n", + " 'qas': [{'answers': [],\n", + " 'id': 'd550492f-a459-4f9e-8c4f-f1c59a26eca2',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]}],\n", + " 'title': 'BNP Paribas’ commitment to universities and schools'},\n", + " {'paragraphs': [{'context': 'Looking to kickstart your career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’ Graduate Programs. These challenging 18-month programs spent in different operational functions enable participants to expand their banking knowledge, skills and professional networks. Learn more about this promising initiative.',\n", + " 'qas': [{'answers': [],\n", + " 'id': '050766ff-11aa-4892-a449-4cee8ee764a6',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'Convinced that recent and future graduates represent the future of the Group, BNP Paribas is counting on their talent to maintain its edge in the market. As it works to build the future of banking, the Group is now putting in place HR measures designed to attract future talent. The Graduate Programs are one example: these recruiting programs allow selected candidates to join several operational functions for 18 months, while benefiting from personalized HR support.',\n", + " 'qas': [{'answers': [],\n", + " 'id': '5a97fe50-3428-4c73-8c0c-4170e71f5fff',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'Hired immediately through long-term contracts, participants in each pathway complete an immersive and personalized curriculum composed of three professional development assignments, each lasting six months. The rotation is co-constructed based on the needs of the bank’s businesses and the skills or interests of each participant. As full team members, participants quickly gain experience and specific skills by working directly with their peers and senior employees in the entity. As soon as they are hired, participants also become members of a “trainee class” to promote joint development, experience-sharing and to help build skills as a group. Recruiting programs are open to candidates of all nationalities, though they take place in France (with some exceptions).',\n", + " 'qas': [{'answers': [],\n", + " 'id': '99aab257-6f61-49d3-b68f-40bacfd87791',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'The programs seek a wide range of candidates. While Master’s (Bac+5) graduates in math, finance, economics, science, business, engineering and computer science, showing strong analytical skills and mathematical abilities, can apply for the Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital explorer” mentality, rather than a specific degree. In this way, the Group aims to develop an internal mindset focused on digital innovation, notably by integrating the following candidates:',\n", + " 'qas': [{'answers': [],\n", + " 'id': 'aa5f8267-0014-4ae0-8b0f-8a41e8a5996f',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", + " {'context': 'For all programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based on their preferences and the opportunities available with each business, participants can co-construct their pathway with HR teams as they progress through the program.',\n", + " 'qas': [{'answers': [],\n", + " 'id': '37fa1b15-ffc2-4701-82ff-d4009b08b66e',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]}],\n", + " 'title': 'BNP Paribas Graduate Programs in France'},\n", + " {'paragraphs': [{'context': 'BNP Paribas recruits new graduates to fulfill assignments lasting up to 16 months by joining the Group through one of its international subsidiaries. What types of positions are available through the VIE program? What destinations does it offer? What types of applicants do we look for? Let’s take a closer look at the international corporate volunteer program, which provides a chance to launch your international career in a position with real responsibilities.',\n", + " 'qas': [{'answers': [],\n", + " 'id': '929d2507-9243-4f47-b4e2-050ff9fdea5b',\n", + " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]}],\n", + " 'title': 'Making the most of your VIE!'}]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qRwGqhHjXPeb", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 935 + }, + "outputId": "7cbf967a-d9fb-413d-f78a-6b8a06c4ae3a" + }, + "source": [ + "reader = Reader(train_file='dev-v2.0-small.json',\n", + " predict_file=squad_examples,\n", + " model_type='xlnet',\n", + " model_name_or_path='xlnet-base-cased',\n", + " fp16=False,\n", + " output_dir='.')" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "text": [ + "07/17/2019 11:38:02 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "07/17/2019 11:38:03 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/17/2019 11:38:03 - INFO - pytorch_transformers.modeling_utils - Model config {\n", + " \"attn_type\": \"bi\",\n", + " \"bi_data\": false,\n", + " \"clamp_len\": -1,\n", + " \"d_head\": 64,\n", + " \"d_inner\": 3072,\n", + " \"d_model\": 768,\n", + " \"dropout\": 0.1,\n", + " \"end_n_top\": 5,\n", + " \"ff_activation\": \"gelu\",\n", + " \"finetuning_task\": null,\n", + " \"initializer_range\": 0.02,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"mem_len\": null,\n", + " \"n_head\": 12,\n", + " \"n_layer\": 12,\n", + " \"n_token\": 32000,\n", + " \"num_labels\": 2,\n", + " \"output_attentions\": false,\n", + " \"output_hidden_states\": false,\n", + " \"reuse_len\": null,\n", + " \"same_length\": false,\n", + " \"start_n_top\": 5,\n", + " \"summary_activation\": \"tanh\",\n", + " \"summary_last_dropout\": 0.1,\n", + " \"summary_type\": \"last\",\n", + " \"summary_use_proj\": true,\n", + " \"torchscript\": false,\n", + " \"untie_r\": true\n", + "}\n", + "\n", + "07/17/2019 11:38:04 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/17/2019 11:38:05 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/17/2019 11:38:11 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", + "07/17/2019 11:38:11 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", + " do_lower_case=True, do_train=None, doc_stride=128,\n", + " eval_all_checkpoints=True, evaluate_during_training=True, fp16=False,\n", + " fp16_opt_level='O1', gradient_accumulation_steps=1, learning_rate=5e-05,\n", + " local_rank=-1, logging_steps=50, max_answer_length=30, max_grad_norm=1.0,\n", + " max_query_length=64, max_seq_length=384, max_steps=-1,\n", + " model_name_or_path='xlnet-base-cased', model_type='xlnet',\n", + " n_best_size=20, no_cuda=True, null_score_diff_threshold=0.0,\n", + " num_train_epochs=3.0, output_dir='.', overwrite_cache=True,\n", + " overwrite_output_dir=True, per_gpu_eval_batch_size=8, ...)\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "85HVKxOJYHuN", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "cae90a62-cdea-4ca5-983f-35f4ff25d68b" + }, + "source": [ + "out_eval, final_prediction = reader.predict(X='')" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "text": [ + "07/17/2019 11:38:11 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at [{'title': 'BNP Paribas’ commitment to universities and schools', 'paragraphs': [{'context': 'BNP Paribas has long maintained a strong relationship with the academic world and target schools in order to attract its future talent, whether it be universities, business schools or engineering schools. The Group serves as a committed partner of the academic world. It also plays a role in curriculum planning by updating coursework so that it matches actual business needs as closely as possible.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '1d0fc3a5-8499-4f9e-a247-689246c9f21a'}]}, {'context': 'To promote the bank’s businesses among students and recruit high-potential candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France match the needs of its businesses, departments and subsidiaries. In other words, the role of Campus Management is to develop quality relationships with students and promote the Group’s businesses. In the words of Jean-Dominique Criscuolo, Manager of Partnerships and Relations with Schools and Universities, “We strongly believe that our Academic Relations play a growing role in transforming our Group.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': 'bbfc511f-f6f8-44ea-a670-c244e78674e2'}]}, {'context': 'According to Jean-Dominique Criscuolo, \"BNP Paribas comprises some 300 businesses, some of which only emerged within the last two years. Including data scientists, agile coaches, and IT inspectors, the new professions created by the digital transformation broaden the palette of the Group’s traditional businesses. In a changing world, governed by rapidly evolving international financial regulations, many new opportunities are available within our Compliance teams. This is a dynamic profession which, due to regulatory, geopolitical and societal changes, plays an increasingly central role in the crossroads of strategy and the daily actions of the bank and its customers.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '53c2b252-0977-4b13-9671-a5f72fca2929'}]}, {'context': 'This large-scale project will further expand in coming years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB, observes that “this partnership aims to become one of the leading research bodies in this immensely disruptive technology. It will also play a role in transforming the financial sector through publications and major events like VivaTech.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': 'f5a14933-debb-4763-9963-bc28630ec803'}]}, {'context': 'Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': 'd550492f-a459-4f9e-8c4f-f1c59a26eca2'}]}]}, {'title': 'BNP Paribas Graduate Programs in France', 'paragraphs': [{'context': 'Looking to kickstart your career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’ Graduate Programs. These challenging 18-month programs spent in different operational functions enable participants to expand their banking knowledge, skills and professional networks. Learn more about this promising initiative.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '050766ff-11aa-4892-a449-4cee8ee764a6'}]}, {'context': 'Convinced that recent and future graduates represent the future of the Group, BNP Paribas is counting on their talent to maintain its edge in the market. As it works to build the future of banking, the Group is now putting in place HR measures designed to attract future talent. The Graduate Programs are one example: these recruiting programs allow selected candidates to join several operational functions for 18 months, while benefiting from personalized HR support.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '5a97fe50-3428-4c73-8c0c-4170e71f5fff'}]}, {'context': 'Hired immediately through long-term contracts, participants in each pathway complete an immersive and personalized curriculum composed of three professional development assignments, each lasting six months. The rotation is co-constructed based on the needs of the bank’s businesses and the skills or interests of each participant. As full team members, participants quickly gain experience and specific skills by working directly with their peers and senior employees in the entity. As soon as they are hired, participants also become members of a “trainee class” to promote joint development, experience-sharing and to help build skills as a group. Recruiting programs are open to candidates of all nationalities, though they take place in France (with some exceptions).', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '99aab257-6f61-49d3-b68f-40bacfd87791'}]}, {'context': 'The programs seek a wide range of candidates. While Master’s (Bac+5) graduates in math, finance, economics, science, business, engineering and computer science, showing strong analytical skills and mathematical abilities, can apply for the Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital explorer” mentality, rather than a specific degree. In this way, the Group aims to develop an internal mindset focused on digital innovation, notably by integrating the following candidates:', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': 'aa5f8267-0014-4ae0-8b0f-8a41e8a5996f'}]}, {'context': 'For all programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based on their preferences and the opportunities available with each business, participants can co-construct their pathway with HR teams as they progress through the program.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '37fa1b15-ffc2-4701-82ff-d4009b08b66e'}]}]}, {'title': 'Making the most of your VIE!', 'paragraphs': [{'context': 'BNP Paribas recruits new graduates to fulfill assignments lasting up to 16 months by joining the Group through one of its international subsidiaries. What types of positions are available through the VIE program? What destinations does it offer? What types of applicants do we look for? Let’s take a closer look at the international corporate volunteer program, which provides a chance to launch your international career in a position with real responsibilities.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '929d2507-9243-4f47-b4e2-050ff9fdea5b'}]}]}]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁ b n p ▁ pari bas ▁has ▁long ▁maintained ▁a ▁strong ▁relationship ▁with ▁the ▁academic ▁world ▁and ▁target ▁schools ▁in ▁order ▁to ▁attract ▁its ▁future ▁talent , ▁whether ▁it ▁be ▁universities , ▁business ▁schools ▁or ▁engineering ▁schools . ▁the ▁group ▁serves ▁as ▁a ▁committed ▁partner ▁of ▁the ▁academic ▁world . ▁it ▁also ▁plays ▁a ▁role ▁in ▁curriculum ▁planning ▁by ▁updating ▁course work ▁so ▁that ▁it ▁matches ▁actual ▁business ▁needs ▁as ▁closely ▁as ▁possible . [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:0 20:0 21:0 22:1 23:1 24:1 25:2 26:3 27:4 28:5 29:6 30:7 31:8 32:9 33:10 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:21 45:21 46:22 47:23 48:24 49:25 50:25 51:26 52:27 53:28 54:29 55:30 56:30 57:31 58:32 59:33 60:34 61:35 62:36 63:37 64:38 65:39 66:40 67:41 68:41 69:42 70:43 71:44 72:45 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:63\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 17 508 180 450 17 21605 7522 51 206 4109 24 737 1498 33 18 2550 185 21 1983 1326 25 374 22 4964 81 623 4738 19 548 36 39 5536 19 264 1326 49 3814 1326 9 18 256 3697 34 24 2362 2229 20 18 2550 185 9 36 77 2254 24 682 25 8400 1777 37 19520 477 3552 102 29 36 2466 2746 264 794 34 3126 34 498 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 1\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁to ▁promote ▁the ▁bank ’ s ▁businesses ▁among ▁students ▁and ▁recruit ▁high - pot ential ▁candidates ▁or ▁future ▁talent , ▁the ▁ b n p ▁ pari bas ▁group ▁relies ▁on ▁its ▁campus ▁management ▁team , ▁which ▁ensures ▁that ▁the ▁bank ’ s ▁initiatives ▁carried ▁out ▁with ▁certain ▁schools ▁in ▁ franc e ▁match ▁the ▁needs ▁of ▁its ▁businesses , ▁departments ▁and ▁subsidiaries . ▁in ▁other ▁words , ▁the ▁role ▁of ▁campus ▁management ▁is ▁to ▁develop ▁quality ▁relationships ▁with ▁students ▁and ▁promote ▁the ▁group ’ s ▁businesses . ▁in ▁the ▁words ▁of ▁ je an - dom in ique ▁ cri scu olo , ▁manager ▁of ▁partnerships ▁and ▁relations ▁with ▁schools ▁and ▁universities , ▁“ we ▁strongly ▁believe ▁that ▁our ▁academic ▁relations ▁play ▁a ▁growing ▁role ▁in ▁transforming ▁our ▁group . ” [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:3 23:3 24:4 25:5 26:6 27:7 28:8 29:9 30:9 31:9 32:9 33:10 34:11 35:12 36:13 37:13 38:14 39:15 40:15 41:15 42:15 43:16 44:16 45:16 46:17 47:18 48:19 49:20 50:21 51:22 52:23 53:23 54:24 55:25 56:26 57:27 58:28 59:28 60:28 61:29 62:30 63:31 64:32 65:33 66:34 67:35 68:36 69:36 70:36 71:37 72:38 73:39 74:40 75:41 76:42 77:42 78:43 79:44 80:45 81:45 82:46 83:47 84:48 85:48 86:49 87:50 88:51 89:52 90:53 91:54 92:55 93:56 94:57 95:58 96:59 97:60 98:61 99:62 100:63 101:64 102:64 103:64 104:65 105:65 106:66 107:67 108:68 109:69 110:70 111:70 112:70 113:70 114:70 115:70 116:70 117:71 118:71 119:71 120:71 121:71 122:72 123:73 124:74 125:75 126:76 127:77 128:78 129:79 130:80 131:80 132:81 133:81 134:82 135:83 136:84 137:85 138:86 139:87 140:88 141:89 142:90 143:91 144:92 145:93 146:94 147:95 148:95 149:95\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 22 2573 18 1013 165 23 1812 447 466 21 10499 227 13 7111 12046 2338 49 623 4738 19 18 17 508 180 450 17 21605 7522 256 15429 31 81 3344 988 230 19 59 16554 29 18 1013 165 23 7750 1708 78 33 1028 1326 25 17 12786 93 854 18 794 20 81 1812 19 7023 21 19328 9 25 86 1006 19 18 682 20 3344 988 27 22 1627 882 4443 33 466 21 2573 18 256 165 23 1812 9 25 18 1006 20 17 2554 262 13 5173 153 8911 17 6752 9336 8292 19 1416 20 13501 21 1704 33 1326 21 5536 19 221 1603 3877 676 29 120 2550 1704 354 24 1358 682 25 20775 120 256 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 2\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁according ▁to ▁ je an - dom in ique ▁ cri scu olo , ▁ \" b n p ▁ pari bas ▁comprises ▁some ▁300 ▁businesses , ▁some ▁of ▁which ▁only ▁emerged ▁within ▁the ▁last ▁two ▁years . ▁including ▁data ▁scientists , ▁agile ▁coaches , ▁and ▁it ▁inspectors , ▁the ▁new ▁profession s ▁created ▁by ▁the ▁digital ▁transformation ▁broaden ▁the ▁palette ▁of ▁the ▁group ’ s ▁traditional ▁businesses . ▁in ▁a ▁changing ▁world , ▁governed ▁by ▁rapidly ▁ evo lving ▁international ▁financial ▁regulations , ▁many ▁new ▁opportunities ▁are ▁available ▁within ▁our ▁compliance ▁teams . ▁this ▁is ▁a ▁dynamic ▁profession ▁which , ▁due ▁to ▁regulatory , ▁geopolitical ▁and ▁societal ▁changes , ▁plays ▁an ▁increasingly ▁central ▁role ▁in ▁the ▁crossroads ▁of ▁strategy ▁and ▁the ▁daily ▁actions ▁of ▁the ▁bank ▁and ▁its ▁customers . ” [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:2 23:2 24:2 25:2 26:2 27:3 28:3 29:3 30:3 31:3 32:4 33:4 34:4 35:4 36:4 37:5 38:5 39:5 40:6 41:7 42:8 43:9 44:9 45:10 46:11 47:12 48:13 49:14 50:15 51:16 52:17 53:18 54:19 55:19 56:20 57:21 58:22 59:22 60:23 61:24 62:24 63:25 64:26 65:27 66:27 67:28 68:29 69:30 70:30 71:31 72:32 73:33 74:34 75:35 76:36 77:37 78:38 79:39 80:40 81:41 82:41 83:41 84:42 85:43 86:43 87:44 88:45 89:46 90:47 91:47 92:48 93:49 94:50 95:51 96:51 97:51 98:52 99:53 100:54 101:54 102:55 103:56 104:57 105:58 106:59 107:60 108:61 109:62 110:63 111:63 112:64 113:65 114:66 115:67 116:68 117:69 118:69 119:70 120:71 121:72 122:72 123:73 124:74 125:75 126:76 127:76 128:77 129:78 130:79 131:80 132:81 133:82 134:83 135:84 136:85 137:86 138:87 139:88 140:89 141:90 142:91 143:92 144:93 145:94 146:95 147:96 148:96 149:96\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 549 22 17 2554 262 13 5173 153 8911 17 6752 9336 8292 19 17 12 508 180 450 17 21605 7522 12562 106 3213 1812 19 106 20 59 114 4871 364 18 129 87 123 9 208 527 3582 19 29438 6668 19 21 36 8196 19 18 109 7862 23 927 37 18 2247 8681 15009 18 24797 20 18 256 165 23 1505 1812 9 25 24 3110 185 19 12413 37 4556 17 8934 13763 440 638 3768 19 142 109 2361 41 387 364 120 7486 1314 9 52 27 24 6148 7862 59 19 542 22 7582 19 31847 21 26459 1084 19 2254 48 3146 1063 682 25 18 28599 20 2240 21 18 1362 2442 20 18 1013 21 81 1391 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 3\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁this ▁large - scale ▁project ▁will ▁further ▁expand ▁in ▁coming ▁years , ▁notably ▁internationally . ▁ gill es ▁des chan el , ▁in ▁charge ▁of ▁academic ▁partnerships ▁at ▁ b n p ▁ pari bas ▁ ci b , ▁observe s ▁that ▁“ this ▁partnership ▁aims ▁to ▁become ▁one ▁of ▁the ▁leading ▁research ▁bodies ▁in ▁this ▁immensely ▁disruptive ▁technology . ▁it ▁will ▁also ▁play ▁a ▁role ▁in ▁transforming ▁the ▁financial ▁sector ▁through ▁publications ▁and ▁major ▁events ▁like ▁ viv a tech . ” [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:1 21:1 22:2 23:3 24:4 25:5 26:6 27:7 28:8 29:8 30:9 31:10 32:10 33:11 34:11 35:11 36:12 37:12 38:12 39:12 40:13 41:14 42:15 43:16 44:17 45:18 46:19 47:19 48:19 49:19 50:20 51:20 52:20 53:21 54:21 55:21 56:21 57:22 58:22 59:23 60:24 61:24 62:25 63:26 64:27 65:28 66:29 67:30 68:31 69:32 70:33 71:34 72:35 73:36 74:37 75:38 76:39 77:39 78:40 79:41 80:42 81:43 82:44 83:45 84:46 85:47 86:48 87:49 88:50 89:51 90:52 91:53 92:54 93:55 94:56 95:57 96:57 97:57 98:57 99:57 100:57\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 52 392 13 5339 686 53 608 3491 25 834 123 19 7003 8320 9 17 19200 202 9127 5863 530 19 25 1336 20 2550 13501 38 17 508 180 450 17 21605 7522 17 2294 508 19 9051 23 29 221 7567 4164 6471 22 401 65 20 18 895 557 2443 25 52 25170 27920 913 9 36 53 77 354 24 682 25 20775 18 638 1967 135 7134 21 383 1027 115 17 13430 101 4906 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 4\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁since ▁ jan uary ▁2016 , ▁ b n p ▁ pari bas ▁has ▁offered ▁an ▁excellence ▁program ▁targeting ▁new ▁master ’ s ▁level ▁graduates ▁ ( bac + 5 ) ▁who ▁show ▁high ▁potential . ▁the ▁aid ▁program ▁last s ▁18 ▁months ▁and ▁comprises ▁three ▁assignments ▁of ▁six ▁months ▁each . ▁it ▁serves ▁as ▁a ▁strong ▁career ▁accelerator ▁that ▁enables ▁participants ▁to ▁access ▁high - level ▁management ▁positions ▁at ▁a ▁faster ▁rate . ▁the ▁program ▁allows ▁participants ▁to ▁discover ▁the ▁ b n p ▁ pari bas ▁group ▁and ▁its ▁various ▁entities ▁in ▁ franc e ▁and ▁abroad , ▁build ▁an ▁internal ▁and ▁external ▁network ▁by ▁working ▁on ▁different ▁assignments ▁and ▁receive ▁personalized ▁assistance ▁from ▁a ▁mentor ▁and ▁coaching ▁firm ▁at ▁every ▁step ▁along ▁the ▁way . [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:1 21:1 22:2 23:2 24:3 25:3 26:3 27:3 28:4 29:4 30:4 31:5 32:6 33:7 34:8 35:9 36:10 37:11 38:12 39:12 40:12 41:13 42:14 43:15 44:15 45:15 46:15 47:15 48:15 49:16 50:17 51:18 52:19 53:19 54:20 55:21 56:22 57:23 58:23 59:24 60:25 61:26 62:27 63:28 64:29 65:30 66:31 67:32 68:33 69:33 70:34 71:35 72:36 73:37 74:38 75:39 76:40 77:41 78:42 79:43 80:44 81:45 82:46 83:46 84:46 85:47 86:48 87:49 88:50 89:51 90:52 91:52 92:53 93:54 94:55 95:56 96:57 97:58 98:59 99:60 100:60 101:60 102:60 103:61 104:61 105:61 106:62 107:63 108:64 109:65 110:66 111:67 112:68 113:68 114:68 115:69 116:70 117:70 118:71 119:72 120:73 121:74 122:75 123:76 124:77 125:78 126:79 127:80 128:81 129:82 130:83 131:84 132:85 133:86 134:87 135:88 136:89 137:90 138:91 139:92 140:93 141:94 142:95 143:96 144:97 145:97\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 196 17 6826 23130 2884 19 17 508 180 450 17 21605 7522 51 1295 48 12110 367 9696 109 2822 165 23 565 11225 17 10 14664 7385 217 11 61 351 227 1220 9 18 1443 367 129 23 501 399 21 12562 139 13814 20 404 399 231 9 36 3697 34 24 737 781 28670 29 8350 3809 22 752 227 13 3181 988 2695 38 24 3477 724 9 18 367 1765 3809 22 5183 18 17 508 180 450 17 21605 7522 256 21 81 807 10312 25 17 12786 93 21 4046 19 1266 48 2854 21 4543 1090 37 481 31 332 13814 21 1217 14857 2489 40 24 8570 21 7553 1338 38 300 1101 411 18 162 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 5\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁looking ▁to ▁kick start ▁your ▁career ▁with ▁several ▁months ▁of ▁intensive ▁immersion ▁at ▁a ▁major ▁banking ▁group ? ▁that ▁is ▁the ▁opportunity ▁offered ▁to ▁recent ▁graduates ▁by ▁ b n p ▁ pari bas ’ ▁graduate ▁programs . ▁these ▁challenging ▁18 - month ▁programs ▁spent ▁in ▁different ▁operational ▁functions ▁enable ▁participants ▁to ▁expand ▁their ▁banking ▁knowledge , ▁skills ▁and ▁professional ▁networks . ▁learn ▁more ▁about ▁this ▁promising ▁initiative . [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:3 23:4 24:5 25:6 26:7 27:8 28:9 29:10 30:11 31:12 32:13 33:14 34:15 35:15 36:16 37:17 38:18 39:19 40:20 41:21 42:22 43:23 44:24 45:25 46:25 47:25 48:25 49:26 50:26 51:26 52:26 53:27 54:28 55:28 56:29 57:30 58:31 59:31 60:31 61:32 62:33 63:34 64:35 65:36 66:37 67:38 68:39 69:40 70:41 71:42 72:43 73:44 74:44 75:45 76:46 77:47 78:48 79:48 80:49 81:50 82:51 83:52 84:53 85:54 86:54\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 589 22 4343 11119 73 781 33 294 399 20 10163 31203 38 24 383 4236 256 82 29 27 18 1394 1295 22 644 11225 37 17 508 180 450 17 21605 7522 165 3868 973 9 166 6215 501 13 2719 973 1188 25 332 6993 3730 4520 3809 22 3491 58 4236 1556 19 1924 21 1030 3986 9 1184 70 75 52 7559 4694 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 6\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁convinced ▁that ▁recent ▁and ▁future ▁graduates ▁represent ▁the ▁future ▁of ▁the ▁group , ▁ b n p ▁ pari bas ▁is ▁counting ▁on ▁their ▁talent ▁to ▁maintain ▁its ▁edge ▁in ▁the ▁market . ▁as ▁it ▁works ▁to ▁build ▁the ▁future ▁of ▁banking , ▁the ▁group ▁is ▁now ▁putting ▁in ▁place ▁ hr ▁measures ▁designed ▁to ▁attract ▁future ▁talent . ▁the ▁graduate ▁programs ▁are ▁one ▁example : ▁these ▁recruiting ▁programs ▁allow ▁selected ▁candidates ▁to ▁join ▁several ▁operational ▁functions ▁for ▁18 ▁months , ▁while ▁benefit ing ▁from ▁personalized ▁ hr ▁support . [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:4 23:5 24:6 25:7 26:8 27:9 28:10 29:11 30:11 31:12 32:12 33:12 34:12 35:13 36:13 37:13 38:14 39:15 40:16 41:17 42:18 43:19 44:20 45:21 46:22 47:23 48:24 49:25 50:25 51:26 52:27 53:28 54:29 55:30 56:31 57:32 58:33 59:34 60:34 61:35 62:36 63:37 64:38 65:39 66:40 67:41 68:42 69:42 70:43 71:44 72:45 73:46 74:47 75:48 76:48 77:49 78:50 79:51 80:52 81:53 82:54 83:54 84:55 85:56 86:57 87:58 88:59 89:60 90:61 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:68 99:69 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:74\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 5503 29 644 21 623 11225 3109 18 623 20 18 256 19 17 508 180 450 17 21605 7522 27 9309 31 58 4738 22 2224 81 2370 25 18 344 9 34 36 1021 22 1266 18 623 20 4236 19 18 256 27 145 2705 25 250 17 3563 1858 1064 22 4964 623 4738 9 18 3868 973 41 65 717 60 166 11512 973 910 2283 2338 22 1673 294 6993 3730 28 501 399 19 171 1887 56 40 14857 17 3563 309 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 7\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁hired ▁immediately ▁through ▁long - term ▁contracts , ▁participants ▁in ▁each ▁pathway ▁complete ▁an ▁ immer sive ▁and ▁personalized ▁curriculum ▁composed ▁of ▁three ▁professional ▁development ▁assignments , ▁each ▁lasting ▁six ▁months . ▁the ▁rotation ▁is ▁co - con struct ed ▁based ▁on ▁the ▁needs ▁of ▁the ▁bank ’ s ▁businesses ▁and ▁the ▁skills ▁or ▁interests ▁of ▁each ▁participant . ▁as ▁full ▁team ▁members , ▁participants ▁quickly ▁gain ▁experience ▁and ▁specific ▁skills ▁by ▁working ▁directly ▁with ▁their ▁peers ▁and ▁senior ▁employees ▁in ▁the ▁entity . ▁as ▁soon ▁as ▁they ▁are ▁hired , ▁participants ▁also ▁become ▁members ▁of ▁a ▁“ train ee ▁class ” ▁to ▁promote ▁joint ▁development , ▁experience - sharing ▁and ▁to ▁help ▁build ▁skills ▁as ▁a ▁group . ▁recruiting ▁programs ▁are ▁open ▁to ▁candidates ▁of ▁all ▁nationalities , ▁though ▁they ▁take ▁place ▁in ▁ franc e ▁ ( with ▁some ▁exceptions ) . [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:3 23:3 24:4 25:4 26:5 27:6 28:7 29:8 30:9 31:10 32:11 33:11 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:20 45:21 46:22 47:23 48:24 49:24 50:25 51:26 52:27 53:28 54:28 55:28 56:28 57:28 58:29 59:30 60:31 61:32 62:33 63:34 64:35 65:35 66:35 67:36 68:37 69:38 70:39 71:40 72:41 73:42 74:43 75:44 76:44 77:45 78:46 79:47 80:48 81:48 82:49 83:50 84:51 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:60 94:61 95:62 96:63 97:64 98:65 99:66 100:67 101:67 102:68 103:69 104:70 105:71 106:72 107:73 108:73 109:74 110:75 111:76 112:77 113:78 114:79 115:80 116:80 117:80 118:81 119:81 120:82 121:83 122:84 123:85 124:85 125:86 126:86 127:86 128:87 129:88 130:89 131:90 132:91 133:92 134:93 135:94 136:94 137:95 138:96 139:97 140:98 141:99 142:100 143:101 144:102 145:103 146:103 147:104 148:105 149:106 150:107 151:108 152:109 153:109 154:109 155:110 156:110 157:110 158:111 159:112 160:112 161:112\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 4500 1262 135 206 13 1483 4392 19 3809 25 231 13260 1009 48 17 16621 6578 21 14857 8400 4341 20 139 1030 503 13814 19 231 9691 404 399 9 18 9921 27 830 13 1865 5976 68 515 31 18 794 20 18 1013 165 23 1812 21 18 1924 49 2451 20 231 14056 9 34 410 230 340 19 3809 1068 2127 656 21 1240 1924 37 481 1509 33 58 12593 21 1118 1616 25 18 8756 9 34 802 34 63 41 4500 19 3809 77 401 340 20 24 221 11101 2461 1075 407 22 2573 1935 503 19 656 13 13756 21 22 222 1266 1924 34 24 256 9 11512 973 41 433 22 2338 20 71 27321 19 464 63 182 250 25 17 12786 93 17 10 3263 106 13534 11 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 8\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁the ▁programs ▁seek ▁a ▁wide ▁range ▁of ▁candidates . ▁while ▁master ’ s ▁ ( bac + 5 ) ▁graduates ▁in ▁math , ▁finance , ▁economics , ▁science , ▁business , ▁engineering ▁and ▁computer ▁science , ▁showing ▁strong ▁analytical ▁skills ▁and ▁mathematical ▁abilities , ▁can ▁apply ▁for ▁the ▁excellence ▁program , ▁digital ▁and ▁data - oriented ▁candidates ▁are ▁invited ▁to ▁join ▁the ▁digital ▁path . ▁the ▁digital ▁path ▁encourages ▁a ▁passionate ▁mindset ▁and ▁a ▁“ digit al ▁explorer ” ▁mentality , ▁rather ▁than ▁a ▁specific ▁degree . ▁in ▁this ▁way , ▁the ▁group ▁aims ▁to ▁develop ▁an ▁internal ▁mindset ▁focused ▁on ▁digital ▁innovation , ▁notably ▁by ▁integrating ▁the ▁following ▁candidates : [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:4 23:5 24:6 25:7 26:7 27:8 28:9 29:9 30:9 31:10 32:10 33:10 34:10 35:10 36:10 37:11 38:12 39:13 40:13 41:14 42:14 43:15 44:15 45:16 46:16 47:17 48:17 49:18 50:19 51:20 52:21 53:21 54:22 55:23 56:24 57:25 58:26 59:27 60:28 61:28 62:29 63:30 64:31 65:32 66:33 67:34 68:34 69:35 70:36 71:37 72:37 73:37 74:38 75:39 76:40 77:41 78:42 79:43 80:44 81:45 82:45 83:46 84:47 85:48 86:49 87:50 88:51 89:52 90:53 91:54 92:55 93:55 94:55 95:56 96:56 97:57 98:57 99:58 100:59 101:60 102:61 103:62 104:62 105:63 106:64 107:65 108:65 109:66 110:67 111:68 112:69 113:70 114:71 115:72 116:73 117:74 118:75 119:76 120:77 121:77 122:78 123:79 124:80 125:81 126:82 127:83 128:83\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 18 973 2304 24 1213 944 20 2338 9 171 2822 165 23 17 10 14664 7385 217 11 11225 25 8566 19 2761 19 10533 19 1767 19 264 19 3814 21 920 1767 19 2343 737 19799 1924 21 12956 8621 19 64 2285 28 18 12110 367 19 2247 21 527 13 6754 2338 41 3687 22 1673 18 2247 2606 9 18 2247 2606 13680 24 11037 26357 21 24 221 9235 212 18201 407 23149 19 870 100 24 1240 1693 9 25 52 162 19 18 256 6471 22 1627 48 2854 26357 2661 31 2247 7767 19 7003 37 21724 18 405 2338 60 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 9\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁for ▁all ▁programs , ▁initiative , ▁determination ▁and ▁curiosity ▁are ▁essential ▁qualities . ▁participants ▁should ▁also ▁be ▁committed ▁team ▁players ▁eager ▁to ▁tackle ▁collaborative ▁work . ▁candidates ▁need ▁not ▁have ▁a ▁specific ▁end ▁goal ▁in ▁mind ▁for ▁the ▁program — based ▁on ▁their ▁preferences ▁and ▁the ▁opportunities ▁available ▁with ▁each ▁business , ▁participants ▁can ▁co - con struct ▁their ▁pathway ▁with ▁ hr ▁teams ▁as ▁they ▁progress ▁through ▁the ▁program . [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:3 23:3 24:4 25:5 26:6 27:7 28:8 29:9 30:9 31:10 32:11 33:12 34:13 35:14 36:15 37:16 38:17 39:18 40:19 41:20 42:21 43:21 44:22 45:23 46:24 47:25 48:26 49:27 50:28 51:29 52:30 53:31 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:38 63:39 64:40 65:41 66:42 67:43 68:44 69:44 70:45 71:46 72:47 73:47 74:47 75:47 76:48 77:49 78:50 79:51 80:51 81:52 82:53 83:54 84:55 85:56 86:57 87:58 88:58\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 28 71 973 19 4694 19 7775 21 15844 41 3018 10855 9 3809 170 77 39 2362 230 793 7456 22 6204 13295 154 9 2338 214 50 47 24 1240 239 935 25 823 28 18 367 1559 716 31 58 14319 21 18 2361 387 33 231 264 19 3809 64 830 13 1865 5976 58 13260 33 17 3563 1314 34 63 1915 135 18 367 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - example_index: 10\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁ b n p ▁ pari bas ▁recruits ▁new ▁graduates ▁to ▁fulfill ▁assignments ▁lasting ▁up ▁to ▁16 ▁months ▁by ▁joining ▁the ▁group ▁through ▁one ▁of ▁its ▁international ▁subsidiaries . ▁what ▁types ▁of ▁positions ▁are ▁available ▁through ▁the ▁ vie ▁program ? ▁what ▁destinations ▁does ▁it ▁offer ? ▁what ▁types ▁of ▁applicants ▁do ▁we ▁look ▁for ? ▁let ’ s ▁take ▁a ▁closer ▁look ▁at ▁the ▁international ▁corporate ▁volunteer ▁program , ▁which ▁provides ▁a ▁chance ▁to ▁launch ▁your ▁international ▁career ▁in ▁a ▁position ▁with ▁real ▁ responsibilities . [SEP]\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:0 20:0 21:0 22:1 23:1 24:1 25:2 26:3 27:4 28:5 29:6 30:7 31:8 32:9 33:10 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:21 45:22 46:22 47:23 48:24 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:31 57:32 58:32 59:33 60:34 61:35 62:36 63:37 64:37 65:38 66:39 67:40 68:41 69:42 70:43 71:44 72:45 73:45 74:46 75:46 76:46 77:47 78:48 79:49 80:50 81:51 82:52 83:53 84:54 85:55 86:56 87:56 88:57 89:58 90:59 91:60 92:61 93:62 94:63 95:64 96:65 97:66 98:67 99:68 100:69 101:70 102:71 103:71 104:71\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 17 508 180 450 17 21605 7522 17954 109 11225 22 9652 13814 9691 76 22 504 399 37 4391 18 256 135 65 20 81 440 19328 9 113 1971 20 2695 41 387 135 18 17 9209 367 82 113 11835 358 36 670 82 113 1971 20 10914 112 80 338 28 82 618 165 23 182 24 2492 338 38 18 440 2348 6134 367 19 59 1176 24 1116 22 2498 73 440 781 25 24 740 33 525 17 7517 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_dev_xlnet-base-cased_384\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.reader_sklearn - ***** Running evaluation *****\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.reader_sklearn - Num examples = 11\n", + "07/17/2019 11:38:11 - INFO - cdqa.reader.reader_sklearn - Batch size = 8\n", + "Evaluating: 100%|██████████| 2/2 [00:25<00:00, 15.39s/it]\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Writing predictions to: ./predictions_.json\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'business schools or engineering schools. the group serves as a committed partner of the academic' in 'business schools or engineering schools. The Group serves as a committed partner of the academic'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether' in 'talent, whether'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether it be universities, business schools' in 'talent, whether it be universities, business schools'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether it be universities, business schools' in 'talent, whether it be universities, business schools'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'future talent, the bnp paribas group relies on its campus management team, which ensures that the' in 'future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'future talent, the' in 'future talent, the BNP'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'candidates or future talent, the bnp paribas group relies on its campus management team, which ensures that the' in 'candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'paribas group relies on its campus management team, which ensures that the bank’s initiatives carried out with certain schools in france' in 'Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'inique criscuolo, \"bnp pari' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: ', notably internationally. gilles deschanel, in charge of academic partnerships at bnp paribas' in 'years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB,'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: ', notably internationally. gilles deschanel, in charge of academic partnerships at bnp paribas' in 'years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB,'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp pari' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp pari' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp paribas' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the b' in 'at a faster rate. The program allows participants to discover the BNP'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the b' in 'at a faster rate. The program allows participants to discover the BNP'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the bnp paribas' in 'at a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp paribas' in 'January 2016, BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp paribas' in 'January 2016, BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, b' in 'January 2016, BNP'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp pari' in 'January 2016, BNP Paribas'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'that is the opportunity offered to recent graduates by bnp paribas’' in 'That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group? that' in 'a major banking group? That'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'that is the opportunity offered to recent graduates by bnp paribas’' in 'That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group?' in 'a major banking group?'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'their talent to maintain its edge in the market. as it works to build the future of' in 'their talent to maintain its edge in the market. As it works to build the future of'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'the entity. as soon as they are hired, participants also become members of a “train' in 'the entity. As soon as they are hired, participants also become members of a “trainee'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'science, business' in 'science, business,'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'excellence program, digital and data-oriented candidates are invited to join the digital path. the digital path encourages a passionate mindset and a “digit' in 'Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'excellence program, digital and data-oriented candidates are invited to join the digital path. the digital path encourages a passionate mindset and a “' in 'Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: ', determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: ', determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'participants should also be committed team players eager to tackle collaborative work. candidates need not have a specific end goal in mind for the' in 'Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'participants should also be committed team players eager to tackle collaborative work. candidates need not' in 'Participants should also be committed team players eager to tackle collaborative work. Candidates need not'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not' in 'programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should' in 'programs, initiative, determination and curiosity are essential qualities. Participants should'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should' in 'programs, initiative, determination and curiosity are essential qualities. Participants should'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have a specific end goal in mind for the program' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/17/2019 11:38:37 - INFO - cdqa.reader.utils_squad - Unable to find text: 'up to 16 months by joining the group through one of its international subsidiaries. what' in 'up to 16 months by joining the Group through one of its international subsidiaries. What'\n" + ], + "name": "stderr" + }, + { + "output_type": "error", + "ename": "TypeError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mout_eval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal_prediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m''\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/content/cdQA/cdqa/reader/reader_sklearn.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 582\u001b[0;31m \u001b[0mout_eval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal_prediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprefix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout_eval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal_prediction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/content/cdQA/cdqa/reader/reader_sklearn.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(args, model, tokenizer, prefix)\u001b[0m\n\u001b[1;32m 364\u001b[0m \u001b[0moutput_nbest_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_null_log_odds_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_n_top\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend_n_top\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 366\u001b[0;31m args.version_2_with_negative, tokenizer, args.verbose_logging)\n\u001b[0m\u001b[1;32m 367\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 368\u001b[0m write_predictions(examples, features, all_results, args.n_best_size,\n", + "\u001b[0;32m/content/cdQA/cdqa/reader/utils_squad.py\u001b[0m in \u001b[0;36mwrite_predictions_extended\u001b[0;34m(all_examples, all_features, all_results, n_best_size, max_answer_length, output_prediction_file, output_nbest_file, output_null_log_odds_file, orig_data_file, start_n_top, end_n_top, version_2_with_negative, tokenizer, verbose_logging)\u001b[0m\n\u001b[1;32m 892\u001b[0m \u001b[0mwriter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores_diff_json\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 894\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0morig_data_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 895\u001b[0m \u001b[0morig_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"data\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 896\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not list" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "J_72WSnDlAxn", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!ls -la" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "irjokX-mQvmY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# print('query: {}'.format(query))\n", + "print('answer: {}'.format(final_prediction[0]))\n", + "print('title: {}'.format(final_prediction[1]))\n", + "print('paragraph: {}'.format(final_prediction[2]))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "0GObRQ1rJs-K", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!ls -la" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ikxxSgPPLP9C", "colab_type": "code", "colab": {} }, "source": [ - "# prediction = reader.predict(X='Since when does the Excellence Program of BNP Paribas exist?')" + "!cat predictions_.json" ], "execution_count": 0, "outputs": [] From dfe2669efb20e0877bc13e0b5ea0220e1819bdb1 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 17:41:57 +0200 Subject: [PATCH 28/43] sync HF --- cdqa/reader/hf_original_examples/run_squad.py | 11 ++++++----- cdqa/reader/reader_sklearn.py | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cdqa/reader/hf_original_examples/run_squad.py b/cdqa/reader/hf_original_examples/run_squad.py index e920ebe3..d72d67b8 100644 --- a/cdqa/reader/hf_original_examples/run_squad.py +++ b/cdqa/reader/hf_original_examples/run_squad.py @@ -122,9 +122,9 @@ def train(args, train_dataset, model, tokenizer): model.train() batch = tuple(t.to(args.device) for t in batch) inputs = {'input_ids': batch[0], - 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids - 'attention_mask': batch[2], - 'start_positions': batch[3], + 'attention_mask': batch[1], + 'token_type_ids': None if args.model_type == 'xlm' else batch[2], + 'start_positions': batch[3], 'end_positions': batch[4]} if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[5], @@ -206,8 +206,9 @@ def evaluate(args, model, tokenizer, prefix=""): batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], - 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids - 'attention_mask': batch[2]} + 'attention_mask': batch[1], + 'token_type_ids': None if args.model_type == 'xlm' else batch[2] # XLM don't use segment_ids + } example_indices = batch[3] if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[4], diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 25a05652..b643d779 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -124,9 +124,9 @@ def train(args, train_dataset, model, tokenizer): model.train() batch = tuple(t.to(args.device) for t in batch) inputs = {'input_ids': batch[0], - 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids - 'attention_mask': batch[2], - 'start_positions': batch[3], + 'attention_mask': batch[1], + 'token_type_ids': None if args.model_type == 'xlm' else batch[2], + 'start_positions': batch[3], 'end_positions': batch[4]} if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[5], @@ -208,8 +208,9 @@ def evaluate(args, model, tokenizer, prefix=""): batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], - 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids - 'attention_mask': batch[2]} + 'attention_mask': batch[1], + 'token_type_ids': None if args.model_type == 'xlm' else batch[2] # XLM don't use segment_ids + } example_indices = batch[3] if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[4], From f6c73bb4a2b74c6afbaa530c4ca4216eda79f002 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 17 Jul 2019 17:55:43 +0200 Subject: [PATCH 29/43] debug write_predictions_extended() --- cdqa/reader/utils_squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdqa/reader/utils_squad.py b/cdqa/reader/utils_squad.py index 0b71044c..c0a4e592 100644 --- a/cdqa/reader/utils_squad.py +++ b/cdqa/reader/utils_squad.py @@ -903,7 +903,7 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s exact_raw, f1_raw = get_raw_scores(orig_data, all_predictions) out_eval = {} - find_all_best_thresh_v2(out_eval, all_predictions, exact_raw, f1_raw, scores_diff_json, qid_to_has_ans) + # find_all_best_thresh_v2(out_eval, all_predictions, exact_raw, f1_raw, scores_diff_json, qid_to_has_ans) return out_eval, final_prediction From 7e27e7a2784a2986cf52d347864f3543d508e972 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Thu, 18 Jul 2019 11:04:22 +0200 Subject: [PATCH 30/43] update last notebook --- examples/tutorial-train-xlnet-squad.ipynb | 1510 +++++++++++---------- 1 file changed, 821 insertions(+), 689 deletions(-) diff --git a/examples/tutorial-train-xlnet-squad.ipynb b/examples/tutorial-train-xlnet-squad.ipynb index 50f1056d..c2b0142a 100644 --- a/examples/tutorial-train-xlnet-squad.ipynb +++ b/examples/tutorial-train-xlnet-squad.ipynb @@ -31,10 +31,10 @@ "metadata": { "id": "zNtCqwveFjcK", "colab_type": "code", - "outputId": "33631378-b050-40df-bcf0-c5601c003091", + "outputId": "51af972d-83a0-4187-c207-ada3bef5bebd", "colab": { "base_uri": "https://localhost:8080/", - "height": 151 + "height": 153 } }, "source": [ @@ -46,12 +46,12 @@ "output_type": "stream", "text": [ "Cloning into 'cdQA'...\n", - "remote: Enumerating objects: 131, done.\u001b[K\n", - "remote: Counting objects: 100% (131/131), done.\u001b[K\n", - "remote: Compressing objects: 100% (92/92), done.\u001b[K\n", - "remote: Total 927 (delta 85), reused 76 (delta 39), pack-reused 796\u001b[K\n", - "Receiving objects: 100% (927/927), 319.29 KiB | 457.00 KiB/s, done.\n", - "Resolving deltas: 100% (552/552), done.\n" + "remote: Enumerating objects: 152, done.\u001b[K\n", + "remote: Counting objects: 100% (152/152), done.\u001b[K\n", + "remote: Compressing objects: 100% (103/103), done.\u001b[K\n", + "remote: Total 948 (delta 99), reused 96 (delta 49), pack-reused 796\u001b[K\n", + "Receiving objects: 100% (948/948), 351.74 KiB | 733.00 KiB/s, done.\n", + "Resolving deltas: 100% (566/566), done.\n" ], "name": "stdout" } @@ -77,10 +77,10 @@ "metadata": { "id": "5jBtSKczGF38", "colab_type": "code", - "outputId": "cd10460a-d2d9-4a8c-d2f9-85dc96012683", + "outputId": "925c98ca-29f2-405d-a03e-cd45c3c77659", "colab": { "base_uri": "https://localhost:8080/", - "height": 55 + "height": 56 } }, "source": [ @@ -103,7 +103,7 @@ "metadata": { "id": "DHl2HUX1GRd6", "colab_type": "code", - "outputId": "1b8fe994-1a12-481c-b289-e0c3b605d4cf", + "outputId": "e3c0e779-b07f-47d4-e044-fc4115d3e2d9", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 @@ -126,47 +126,47 @@ "Requirement already satisfied: prettytable in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.7.2)\n", "Collecting pytorch_pretrained_bert (from cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)\n", - "\u001b[K |████████████████████████████████| 133kB 52.2MB/s \n", + "\u001b[K |████████████████████████████████| 133kB 53.7MB/s \n", "\u001b[?25hCollecting pytorch-transformers (from cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/b5/2d78e74001af0152ee61d5ad4e290aec9a1e43925b21df2dc74ec100f1ab/pytorch_transformers-1.0.0-py3-none-any.whl (137kB)\n", - "\u001b[K |████████████████████████████████| 143kB 61.0MB/s \n", + "\u001b[K |████████████████████████████████| 143kB 63.5MB/s \n", "\u001b[?25hCollecting tensorboardX (from cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c3/12/dcaf67e1312475b26db9e45e7bb6f32b540671a9ee120b3a72d9e09bc517/tensorboardX-1.8-py2.py3-none-any.whl (216kB)\n", - "\u001b[K |████████████████████████████████| 225kB 63.6MB/s \n", + "\u001b[K |████████████████████████████████| 225kB 59.3MB/s \n", "\u001b[?25hRequirement already satisfied: scikit_learn in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.21.2)\n", "Collecting tika (from cdqa==1.0.3)\n", " Downloading https://files.pythonhosted.org/packages/10/75/b566e446ffcf292f10c8d84c15a3d91615fe3d7ca8072a17c949d4e84b66/tika-1.19.tar.gz\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (4.28.1)\n", "Collecting wget (from cdqa==1.0.3)\n", " Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip\n", - "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (2.10.1)\n", - "Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (7.0)\n", "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (1.1.0)\n", "Requirement already satisfied: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (0.15.4)\n", + "Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (7.0)\n", + "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (2.10.1)\n", "Requirement already satisfied: Six in /usr/local/lib/python3.6/dist-packages (from flask_cors->cdqa==1.0.3) (1.12.0)\n", "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (1.16.4)\n", - "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2018.9)\n", "Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2.5.3)\n", - "Requirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", + "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2018.9)\n", "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.9.185)\n", + "Requirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (2.21.0)\n", "Collecting regex (from pytorch_pretrained_bert->cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB)\n", - "\u001b[K |████████████████████████████████| 655kB 49.7MB/s \n", + "\u001b[K |████████████████████████████████| 655kB 38.0MB/s \n", "\u001b[?25hCollecting sentencepiece (from pytorch-transformers->cdqa==1.0.3)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/00/95/7f357995d5eb1131aa2092096dca14a6fc1b1d2860bd99c22a612e1d1019/sentencepiece-0.1.82-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n", - "\u001b[K |████████████████████████████████| 1.0MB 51.9MB/s \n", + "\u001b[K |████████████████████████████████| 1.0MB 46.9MB/s \n", "\u001b[?25hRequirement already satisfied: protobuf>=3.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardX->cdqa==1.0.3) (3.7.1)\n", "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit_learn->cdqa==1.0.3) (1.3.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from tika->cdqa==1.0.3) (41.0.1)\n", "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->Flask->cdqa==1.0.3) (1.1.1)\n", "Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.2.1)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.9.4)\n", "Requirement already satisfied: botocore<1.13.0,>=1.12.185 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (1.12.185)\n", - "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (1.24.3)\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.9.4)\n", "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2.8)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2019.6.16)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2019.6.16)\n", + "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (1.24.3)\n", "Requirement already satisfied: docutils>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.13.0,>=1.12.185->boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.14)\n", "Building wheels for collected packages: tika, wget, regex\n", " Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n", @@ -212,10 +212,10 @@ "metadata": { "id": "50r4anYBITRO", "colab_type": "code", - "outputId": "4231c98c-4d17-49e0-cd60-cd18af562124", + "outputId": "5560fd38-1679-464e-87a0-c4c71c6b1828", "colab": { "base_uri": "https://localhost:8080/", - "height": 457 + "height": 462 } }, "source": [ @@ -227,28 +227,28 @@ "output_type": "stream", "text": [ "total 79828\n", - "drwxr-xr-x 8 root root 4096 Jul 17 11:30 .\n", - "drwxr-xr-x 1 root root 4096 Jul 17 11:30 ..\n", - "-rw-r--r-- 1 root root 815 Jul 17 11:30 api.py\n", - "-rw-r--r-- 1 root root 22 Jul 17 11:30 apt.txt\n", - "drwxr-xr-x 6 root root 4096 Jul 17 11:30 cdqa\n", - "drwxr-xr-x 2 root root 4096 Jul 17 11:30 cdqa.egg-info\n", - "-rw-r--r-- 1 root root 4854279 Jul 17 11:30 dev-v1.1.json\n", - "-rw-r--r-- 1 root root 4370528 Jul 17 11:30 dev-v2.0.json\n", - "-rw-r--r-- 1 root root 1452 Jul 17 11:30 download.py\n", - "drwxr-xr-x 2 root root 4096 Jul 17 11:30 examples\n", - "drwxr-xr-x 8 root root 4096 Jul 17 11:30 .git\n", - "drwxr-xr-x 3 root root 4096 Jul 17 11:30 .github\n", - "-rw-r--r-- 1 root root 1375 Jul 17 11:30 .gitignore\n", - "-rw-r--r-- 1 root root 11356 Jul 17 11:30 LICENSE\n", - "-rw-r--r-- 1 root root 40 Jul 17 11:30 MANIFEST.in\n", - "-rw-r--r-- 1 root root 12855 Jul 17 11:30 README.md\n", - "-rw-r--r-- 1 root root 129 Jul 17 11:30 requirements.txt\n", - "-rw-r--r-- 1 root root 727 Jul 17 11:30 setup.py\n", - "drwxr-xr-x 2 root root 4096 Jul 17 11:30 tests\n", - "-rw-r--r-- 1 root root 30288272 Jul 17 11:30 train-v1.1.json\n", - "-rw-r--r-- 1 root root 42123633 Jul 17 11:30 train-v2.0.json\n", - "-rw-r--r-- 1 root root 312 Jul 17 11:30 .travis.yml\n" + "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .\n", + "drwxr-xr-x 1 root root 4096 Jul 18 08:52 ..\n", + "-rw-r--r-- 1 root root 815 Jul 18 08:52 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 18 08:52 apt.txt\n", + "drwxr-xr-x 6 root root 4096 Jul 18 08:52 cdqa\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 cdqa.egg-info\n", + "-rw-r--r-- 1 root root 4854279 Jul 18 08:52 dev-v1.1.json\n", + "-rw-r--r-- 1 root root 4370528 Jul 18 08:52 dev-v2.0.json\n", + "-rw-r--r-- 1 root root 1452 Jul 18 08:52 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 18 08:52 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 18 08:52 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 18 08:52 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 18 08:52 MANIFEST.in\n", + "-rw-r--r-- 1 root root 12855 Jul 18 08:52 README.md\n", + "-rw-r--r-- 1 root root 129 Jul 18 08:52 requirements.txt\n", + "-rw-r--r-- 1 root root 727 Jul 18 08:52 setup.py\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 tests\n", + "-rw-r--r-- 1 root root 30288272 Jul 18 08:52 train-v1.1.json\n", + "-rw-r--r-- 1 root root 42123633 Jul 18 08:52 train-v2.0.json\n", + "-rw-r--r-- 1 root root 312 Jul 18 08:52 .travis.yml\n" ], "name": "stdout" } @@ -263,10 +263,10 @@ }, "id": "umJkmO9HFf3L", "colab_type": "code", - "outputId": "bafedcb2-6bf2-4282-8898-485e13622922", + "outputId": "7baca070-08ae-49f4-c464-50bc3c40b741", "colab": { "base_uri": "https://localhost:8080/", - "height": 75 + "height": 76 } }, "source": [ @@ -292,10 +292,10 @@ "metadata": { "id": "ylorIsqLz_J3", "colab_type": "code", - "outputId": "fb597f7d-ba49-4bc9-826b-1cd855c399ed", + "outputId": "0d0cd98f-f44c-4e3d-eaf1-caef8aeff290", "colab": { "base_uri": "https://localhost:8080/", - "height": 247 + "height": 230 } }, "source": [ @@ -306,7 +306,7 @@ { "output_type": "stream", "text": [ - "--2019-07-17 11:31:03-- https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json\n", + "--2019-07-18 08:53:01-- https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", @@ -315,7 +315,7 @@ "\n", "\rdev-v2.0-small.json 0%[ ] 0 --.-KB/s \rdev-v2.0-small.json 100%[===================>] 8.58K --.-KB/s in 0s \n", "\n", - "2019-07-17 11:31:03 (166 MB/s) - ‘dev-v2.0-small.json’ saved [8786/8786]\n", + "2019-07-18 08:53:01 (92.6 MB/s) - ‘dev-v2.0-small.json’ saved [8786/8786]\n", "\n" ], "name": "stdout" @@ -327,11 +327,11 @@ "metadata": { "id": "sMRDXXFdaO7z", "colab_type": "code", + "outputId": "e921c11f-70ce-4cfb-9519-131692dc787e", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 - }, - "outputId": "ec2ee20c-76ae-495b-b3df-81eb61db52c1" + } }, "source": [ "!cat dev-v2.0-small.json" @@ -491,7 +491,7 @@ "metadata": { "id": "owyoli60qGb9", "colab_type": "code", - "outputId": "5551e4b9-7556-49cd-83ca-f112a0aadc0d", + "outputId": "ef6f4e1c-a4d6-42c8-ef4b-970e633389ec", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 @@ -510,14 +510,14 @@ { "output_type": "stream", "text": [ - "07/17/2019 11:31:18 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", - "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json not found in cache, downloading to /tmp/tmpe6r5d8ur\n", - "100%|██████████| 641/641 [00:00<00:00, 123844.90B/s]\n", - "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpe6r5d8ur to cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/17/2019 11:31:19 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpe6r5d8ur\n", - "07/17/2019 11:31:19 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/17/2019 11:31:19 - INFO - pytorch_transformers.modeling_utils - Model config {\n", + "07/18/2019 08:53:10 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "07/18/2019 08:53:10 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json not found in cache, downloading to /tmp/tmpo_7tn6k6\n", + "100%|██████████| 641/641 [00:00<00:00, 372581.61B/s]\n", + "07/18/2019 08:53:11 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpo_7tn6k6 to cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/18/2019 08:53:11 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/18/2019 08:53:11 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpo_7tn6k6\n", + "07/18/2019 08:53:11 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/18/2019 08:53:11 - INFO - pytorch_transformers.modeling_utils - Model config {\n", " \"attn_type\": \"bi\",\n", " \"bi_data\": false,\n", " \"clamp_len\": -1,\n", @@ -548,21 +548,21 @@ " \"untie_r\": true\n", "}\n", "\n", - "07/17/2019 11:31:20 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model not found in cache, downloading to /tmp/tmpn1g639tv\n", - "100%|██████████| 798011/798011 [00:01<00:00, 597312.57B/s]\n", - "07/17/2019 11:31:23 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpn1g639tv to cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/17/2019 11:31:23 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/17/2019 11:31:23 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpn1g639tv\n", - "07/17/2019 11:31:23 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/17/2019 11:31:24 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin not found in cache, downloading to /tmp/tmpcusodobh\n", - "100%|██████████| 467042463/467042463 [00:46<00:00, 10091679.98B/s]\n", - "07/17/2019 11:32:11 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpcusodobh to cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/17/2019 11:32:13 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/17/2019 11:32:13 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpcusodobh\n", - "07/17/2019 11:32:13 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/17/2019 11:32:18 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", - "07/17/2019 11:32:18 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", + "07/18/2019 08:53:12 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model not found in cache, downloading to /tmp/tmpj2n7ud_j\n", + "100%|██████████| 798011/798011 [00:00<00:00, 892469.28B/s]\n", + "07/18/2019 08:53:13 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpj2n7ud_j to cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/18/2019 08:53:13 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/18/2019 08:53:13 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpj2n7ud_j\n", + "07/18/2019 08:53:13 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/18/2019 08:53:14 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin not found in cache, downloading to /tmp/tmplb24yv4w\n", + "100%|██████████| 467042463/467042463 [00:36<00:00, 12855738.60B/s]\n", + "07/18/2019 08:53:51 - INFO - pytorch_transformers.file_utils - copying /tmp/tmplb24yv4w to cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/18/2019 08:53:53 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/18/2019 08:53:53 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmplb24yv4w\n", + "07/18/2019 08:53:53 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/18/2019 08:53:58 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", + "07/18/2019 08:53:58 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", " do_lower_case=True, do_train=None, doc_stride=128,\n", " eval_all_checkpoints=True, evaluate_during_training=True, fp16=False,\n", " fp16_opt_level='O1', gradient_accumulation_steps=1, learning_rate=5e-05,\n", @@ -582,7 +582,7 @@ "metadata": { "id": "24eT2nuKtrqp", "colab_type": "code", - "outputId": "f15bc136-52d0-40c0-c013-c97a4623a804", + "outputId": "bf9c82fa-677f-4772-f5c6-2d6f22b5f39a", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 @@ -596,200 +596,200 @@ { "output_type": "stream", "text": [ - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at dev-v2.0-small.json\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁in ▁what ▁country ▁is ▁nor man dy ▁located ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 25 113 234 27 2387 249 2087 798 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 61\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 63\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁ franc e\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 1\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁were ▁the ▁nor man s ▁in ▁nor man dy ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 13:0 14:1 15:1 16:1 17:2 18:2 19:2 20:2 21:2 22:3 23:3 24:3 25:3 26:3 27:3 28:4 29:4 30:5 31:5 32:5 33:5 34:6 35:6 36:6 37:7 38:7 39:7 40:8 41:9 42:10 43:11 44:12 45:13 46:14 47:14 48:15 49:16 50:16 51:17 52:18 53:19 54:20 55:21 56:22 57:22 58:22 59:22 60:23 61:24 62:25 63:26 64:26 65:26 66:26 67:27 68:28 69:29 70:30 71:31 72:31 73:32 74:32 75:32 76:32 77:32 78:32 79:33 80:34 81:35 82:35 83:35 84:35 85:35 86:35 87:35 88:36 89:36 90:37 91:38 92:39 93:40 94:40 95:40 96:40 97:41 98:41 99:42 100:43 101:43 102:44 103:44 104:45 105:46 106:47 107:48 108:48 109:48 110:49 111:50 112:51 113:52 114:52 115:52 116:53 117:54 118:55 119:55 120:55 121:56 122:56 123:57 124:58 125:59 126:59 127:59 128:59 129:60 130:61 131:62 132:63 133:64 134:65 135:66 136:67 137:68 138:69 139:69 140:70 141:71 142:71 143:71 144:71 145:71 146:71 147:72 148:72 149:73 150:74 151:75 152:76 153:77 154:78 155:79 156:80 157:80 158:80 159:80 160:80 161:81 162:82 163:83 164:84 165:84 166:84 167:84 168:85 169:86 170:87 171:88 172:89 173:90 174:91 175:92 176:93 177:93 178:93 179:94 180:95 181:96 182:97 183:98 184:99 185:100 186:101 187:102 188:102 189:103 190:103 191:104 192:105 193:106 194:107 195:108 196:109 197:110 198:111 199:112 200:112\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 55 18 2387 249 23 25 2387 249 2087 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 46\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 51\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁10 th ▁and ▁11 th ▁centuries\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 2\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁from ▁which ▁countries ▁did ▁the ▁nor se ▁originate ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 40 59 452 190 18 2387 1022 19788 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 91\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 99\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁ den mark , ▁ice land ▁and ▁nor way\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 3\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁did ▁king ▁ char les ▁ iii ▁swear ▁fe al ty ▁to ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 16:0 17:1 18:1 19:1 20:2 21:2 22:2 23:2 24:2 25:3 26:3 27:3 28:3 29:3 30:3 31:4 32:4 33:5 34:5 35:5 36:5 37:6 38:6 39:6 40:7 41:7 42:7 43:8 44:9 45:10 46:11 47:12 48:13 49:14 50:14 51:15 52:16 53:16 54:17 55:18 56:19 57:20 58:21 59:22 60:22 61:22 62:22 63:23 64:24 65:25 66:26 67:26 68:26 69:26 70:27 71:28 72:29 73:30 74:31 75:31 76:32 77:32 78:32 79:32 80:32 81:32 82:33 83:34 84:35 85:35 86:35 87:35 88:35 89:35 90:35 91:36 92:36 93:37 94:38 95:39 96:40 97:40 98:40 99:40 100:41 101:41 102:42 103:43 104:43 105:44 106:44 107:45 108:46 109:47 110:48 111:48 112:48 113:49 114:50 115:51 116:52 117:52 118:52 119:53 120:54 121:55 122:55 123:55 124:56 125:56 126:57 127:58 128:59 129:59 130:59 131:59 132:60 133:61 134:62 135:63 136:64 137:65 138:66 139:67 140:68 141:69 142:69 143:70 144:71 145:71 146:71 147:71 148:71 149:71 150:72 151:72 152:73 153:74 154:75 155:76 156:77 157:78 158:79 159:80 160:80 161:80 162:80 163:80 164:81 165:82 166:83 167:84 168:84 169:84 170:84 171:85 172:86 173:87 174:88 175:89 176:90 177:91 178:92 179:93 180:93 181:93 182:94 183:95 184:96 185:97 186:98 187:99 188:100 189:101 190:102 191:102 192:103 193:103 194:104 195:105 196:106 197:107 198:108 199:109 200:110 201:111 202:112 203:112\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 190 3351 17 6628 1890 17 28488 13650 9151 212 982 22 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 4\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁did ▁the ▁frank ish ▁identity ▁emerge ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:1 13:1 14:2 15:2 16:2 17:2 18:2 19:3 20:3 21:3 22:3 23:3 24:3 25:4 26:4 27:5 28:5 29:5 30:5 31:6 32:6 33:6 34:7 35:7 36:7 37:8 38:9 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:16 48:17 49:18 50:19 51:20 52:21 53:22 54:22 55:22 56:22 57:23 58:24 59:25 60:26 61:26 62:26 63:26 64:27 65:28 66:29 67:30 68:31 69:31 70:32 71:32 72:32 73:32 74:32 75:32 76:33 77:34 78:35 79:35 80:35 81:35 82:35 83:35 84:35 85:36 86:36 87:37 88:38 89:39 90:40 91:40 92:40 93:40 94:41 95:41 96:42 97:43 98:43 99:44 100:44 101:45 102:46 103:47 104:48 105:48 106:48 107:49 108:50 109:51 110:52 111:52 112:52 113:53 114:54 115:55 116:55 117:55 118:56 119:56 120:57 121:58 122:59 123:59 124:59 125:59 126:60 127:61 128:62 129:63 130:64 131:65 132:66 133:67 134:68 135:69 136:69 137:70 138:71 139:71 140:71 141:71 142:71 143:71 144:72 145:72 146:73 147:74 148:75 149:76 150:77 151:78 152:79 153:80 154:80 155:80 156:80 157:80 158:81 159:82 160:83 161:84 162:84 163:84 164:84 165:85 166:86 167:87 168:88 169:89 170:90 171:91 172:92 173:93 174:93 175:93 176:94 177:95 178:96 179:97 180:98 181:99 182:100 183:101 184:102 185:102 186:103 187:103 188:104 189:105 190:106 191:107 192:108 193:109 194:110 195:111 196:112 197:112\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 190 18 23675 1406 3643 7624 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 5\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁was ▁the ▁duke ▁in ▁the ▁battle ▁of ▁has ting s ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 30 18 25950 25 18 1727 20 51 1203 23 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 246\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 251\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁ william ▁the ▁con quer or\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 6\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁principal ity ▁did ▁ william ▁the ▁con quer er ▁found ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3824 769 190 17 31499 18 2147 10792 118 255 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 7\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁branch ▁of ▁theoretical ▁computer ▁science ▁deals ▁with ▁broadly ▁classify ing ▁computational ▁problems ▁by ▁difficulty ▁and ▁class ▁of ▁relationship ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 22:0 23:1 24:2 25:3 26:4 27:5 28:6 29:7 30:8 31:9 32:10 33:11 34:12 35:13 36:14 37:15 38:16 39:17 40:18 41:18 42:19 43:20 44:21 45:22 46:23 47:24 48:25 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:32 57:32 58:33 59:34 60:35 61:36 62:37 63:38 64:39 65:40 66:41 67:42 68:43 69:44 70:45 71:46 72:46 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:64 93:65 94:66 95:67 96:68 97:69 98:69 99:70 100:71 101:72 102:73 103:73\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3709 20 13109 920 1767 4108 33 16026 27871 56 23228 708 37 6157 21 1075 20 1498 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 22\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 24\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁computational ▁complexity ▁theory\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 8\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁is ▁a ▁manual ▁application ▁of ▁mathematical ▁steps ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:13 25:14 26:15 27:16 28:17 29:18 30:18 31:19 32:20 33:21 34:22 35:23 36:24 37:25 38:25 39:26 40:27 41:28 42:29 43:30 44:31 45:32 46:32 47:33 48:34 49:35 50:36 51:37 52:38 53:39 54:40 55:41 56:42 57:43 58:44 59:45 60:46 61:46 62:46 63:47 64:48 65:49 66:50 67:51 68:52 69:52 70:53 71:54 72:55 73:56 74:57 75:58 76:59 77:60 78:61 79:62 80:63 81:64 82:65 83:66 84:67 85:68 86:69 87:69 88:70 89:71 90:72 91:73 92:73\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 27 24 6403 1479 20 12956 2094 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 9\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁measure ▁of ▁a ▁computational ▁problem ▁broadly ▁defines ▁the ▁inherent ▁difficulty ▁of ▁the ▁solution ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 17:0 18:1 19:2 20:3 21:4 22:5 23:6 24:7 25:8 26:9 27:10 28:11 29:12 30:12 31:13 32:14 33:15 34:16 35:16 36:17 37:18 38:19 39:19 40:20 41:21 42:21 43:22 44:23 45:24 46:25 47:26 48:27 49:28 50:29 51:30 52:31 53:32 54:33 55:33 56:34 57:35 58:36 59:37 60:38 61:39 62:40 63:41 64:41 65:42 66:43 67:44 68:45 69:46 70:46 71:47 72:48 73:49 74:50 75:51 76:52 77:52 78:53 79:54 80:55 81:56 82:57 83:58 84:59 85:59 86:59 87:60 88:61 89:62 90:62 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:69 99:70 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:75 108:76 109:77 110:78 111:79 112:79 113:79 114:80 115:81 116:82 117:82 118:82 119:83 120:84 121:85 122:86 123:87 124:88 125:89 126:90 127:91 128:92 129:93 130:94 131:95 132:96 133:97 134:98 135:99 136:100 137:101 138:102 139:103 140:103\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2310 20 24 23228 662 16026 14668 18 16507 6157 20 18 1938 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 24\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 29\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁if ▁its ▁solution ▁requires ▁significant ▁resources\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 10\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁method ▁is ▁used ▁to ▁in tu itive ly ▁assess ▁or ▁quantify ▁the ▁amount ▁of ▁resources ▁required ▁to ▁solve ▁a ▁computational ▁problem ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 25:0 26:1 27:2 28:3 29:4 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:12 39:13 40:14 41:15 42:16 43:16 44:17 45:18 46:19 47:19 48:20 49:21 50:21 51:22 52:23 53:24 54:25 55:26 56:27 57:28 58:29 59:30 60:31 61:32 62:33 63:33 64:34 65:35 66:36 67:37 68:38 69:39 70:40 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:46 79:47 80:48 81:49 82:50 83:51 84:52 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:59 94:59 95:60 96:61 97:62 98:62 99:62 100:63 101:64 102:65 103:66 104:67 105:68 106:69 107:70 108:70 109:70 110:71 111:72 112:73 113:73 114:74 115:75 116:76 117:77 118:78 119:79 120:79 121:79 122:80 123:81 124:82 125:82 126:82 127:83 128:84 129:85 130:86 131:87 132:88 133:89 134:90 135:91 136:92 137:93 138:94 139:95 140:96 141:97 142:98 143:99 144:100 145:101 146:102 147:103 148:103\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2175 27 179 22 25 2853 9736 111 7329 49 30299 18 1065 20 1485 978 22 4929 24 23228 662 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 53\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 56\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁mathematical ▁models ▁of ▁computation\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000011\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 11\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁are ▁two ▁basic ▁primary ▁resources ▁used ▁to ▁ gu age ▁complexity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:6 22:7 23:8 24:9 25:10 26:11 27:12 28:12 29:13 30:14 31:15 32:16 33:16 34:17 35:18 36:19 37:19 38:20 39:21 40:21 41:22 42:23 43:24 44:25 45:26 46:27 47:28 48:29 49:30 50:31 51:32 52:33 53:33 54:34 55:35 56:36 57:37 58:38 59:39 60:40 61:41 62:41 63:42 64:43 65:44 66:45 67:46 68:46 69:47 70:48 71:49 72:50 73:51 74:52 75:52 76:53 77:54 78:55 79:56 80:57 81:58 82:59 83:59 84:59 85:60 86:61 87:62 88:62 89:62 90:63 91:64 92:65 93:66 94:67 95:68 96:69 97:70 98:70 99:70 100:71 101:72 102:73 103:73 104:74 105:75 106:76 107:77 108:78 109:79 110:79 111:79 112:80 113:81 114:82 115:82 116:82 117:83 118:84 119:85 120:86 121:87 122:88 123:89 124:90 125:91 126:92 127:93 128:94 129:95 130:96 131:97 132:98 133:99 134:100 135:101 136:102 137:103 138:103\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 41 87 1949 1827 1485 179 22 17 3017 981 11906 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - start_position: 65\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - end_position: 67\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - answer: ▁time ▁and ▁storage\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000012\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 12\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁unit ▁is ▁measured ▁to ▁determine ▁circuit ▁simplicity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:12 25:13 26:14 27:15 28:16 29:16 30:17 31:18 32:19 33:19 34:20 35:21 36:21 37:22 38:23 39:24 40:25 41:26 42:27 43:28 44:29 45:30 46:31 47:32 48:33 49:33 50:34 51:35 52:36 53:37 54:38 55:39 56:40 57:41 58:41 59:42 60:43 61:44 62:45 63:46 64:46 65:47 66:48 67:49 68:50 69:51 70:52 71:52 72:53 73:54 74:55 75:56 76:57 77:58 78:59 79:59 80:59 81:60 82:61 83:62 84:62 85:62 86:63 87:64 88:65 89:66 90:67 91:68 92:69 93:70 94:70 95:70 96:71 97:72 98:73 99:73 100:74 101:75 102:76 103:77 104:78 105:79 106:79 107:79 108:80 109:81 110:82 111:82 112:82 113:83 114:84 115:85 116:86 117:87 118:88 119:89 120:90 121:91 122:92 123:93 124:94 125:95 126:96 127:97 128:98 129:99 130:100 131:101 132:102 133:103 134:103\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 1591 27 7375 22 2081 5034 18950 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - unique_id: 1000000013\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - example_index: 13\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁number ▁is ▁used ▁in ▁perpendicular ▁computing ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 18:8 19:9 20:10 21:11 22:12 23:12 24:13 25:14 26:15 27:16 28:16 29:17 30:18 31:19 32:19 33:20 34:21 35:21 36:22 37:23 38:24 39:25 40:26 41:27 42:28 43:29 44:30 45:31 46:32 47:33 48:33 49:34 50:35 51:36 52:37 53:38 54:39 55:40 56:41 57:41 58:42 59:43 60:44 61:45 62:46 63:46 64:47 65:48 66:49 67:50 68:51 69:52 70:52 71:53 72:54 73:55 74:56 75:57 76:58 77:59 78:59 79:59 80:60 81:61 82:62 83:62 84:62 85:63 86:64 87:65 88:66 89:67 90:68 91:69 92:70 93:70 94:70 95:71 96:72 97:73 98:73 99:74 100:75 101:76 102:77 103:78 104:79 105:79 106:79 107:80 108:81 109:82 110:82 111:82 112:83 113:84 114:85 115:86 116:87 117:88 118:89 119:90 120:91 121:92 122:93 123:94 124:95 125:96 126:97 127:98 128:99 129:100 130:101 131:102 132:103 133:103\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 243 27 179 25 30525 9848 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_train_xlnet-base-cased_384\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - ***** Running training *****\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Num examples = 14\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Num Epochs = 3\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Instantaneous batch size per GPU = 8\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Total train batch size (w. parallel, distributed & accumulation) = 8\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Gradient Accumulation steps = 1\n", - "07/17/2019 11:32:18 - INFO - cdqa.reader.reader_sklearn - Total optimization steps = 6\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at dev-v2.0-small.json\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁in ▁what ▁country ▁is ▁nor man dy ▁located ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 25 113 234 27 2387 249 2087 798 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 61\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 63\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁ franc e\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 1\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁were ▁the ▁nor man s ▁in ▁nor man dy ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 13:0 14:1 15:1 16:1 17:2 18:2 19:2 20:2 21:2 22:3 23:3 24:3 25:3 26:3 27:3 28:4 29:4 30:5 31:5 32:5 33:5 34:6 35:6 36:6 37:7 38:7 39:7 40:8 41:9 42:10 43:11 44:12 45:13 46:14 47:14 48:15 49:16 50:16 51:17 52:18 53:19 54:20 55:21 56:22 57:22 58:22 59:22 60:23 61:24 62:25 63:26 64:26 65:26 66:26 67:27 68:28 69:29 70:30 71:31 72:31 73:32 74:32 75:32 76:32 77:32 78:32 79:33 80:34 81:35 82:35 83:35 84:35 85:35 86:35 87:35 88:36 89:36 90:37 91:38 92:39 93:40 94:40 95:40 96:40 97:41 98:41 99:42 100:43 101:43 102:44 103:44 104:45 105:46 106:47 107:48 108:48 109:48 110:49 111:50 112:51 113:52 114:52 115:52 116:53 117:54 118:55 119:55 120:55 121:56 122:56 123:57 124:58 125:59 126:59 127:59 128:59 129:60 130:61 131:62 132:63 133:64 134:65 135:66 136:67 137:68 138:69 139:69 140:70 141:71 142:71 143:71 144:71 145:71 146:71 147:72 148:72 149:73 150:74 151:75 152:76 153:77 154:78 155:79 156:80 157:80 158:80 159:80 160:80 161:81 162:82 163:83 164:84 165:84 166:84 167:84 168:85 169:86 170:87 171:88 172:89 173:90 174:91 175:92 176:93 177:93 178:93 179:94 180:95 181:96 182:97 183:98 184:99 185:100 186:101 187:102 188:102 189:103 190:103 191:104 192:105 193:106 194:107 195:108 196:109 197:110 198:111 199:112 200:112\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 55 18 2387 249 23 25 2387 249 2087 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 46\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 51\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁10 th ▁and ▁11 th ▁centuries\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 2\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁from ▁which ▁countries ▁did ▁the ▁nor se ▁originate ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 40 59 452 190 18 2387 1022 19788 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 91\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 99\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁ den mark , ▁ice land ▁and ▁nor way\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 3\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁did ▁king ▁ char les ▁ iii ▁swear ▁fe al ty ▁to ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 16:0 17:1 18:1 19:1 20:2 21:2 22:2 23:2 24:2 25:3 26:3 27:3 28:3 29:3 30:3 31:4 32:4 33:5 34:5 35:5 36:5 37:6 38:6 39:6 40:7 41:7 42:7 43:8 44:9 45:10 46:11 47:12 48:13 49:14 50:14 51:15 52:16 53:16 54:17 55:18 56:19 57:20 58:21 59:22 60:22 61:22 62:22 63:23 64:24 65:25 66:26 67:26 68:26 69:26 70:27 71:28 72:29 73:30 74:31 75:31 76:32 77:32 78:32 79:32 80:32 81:32 82:33 83:34 84:35 85:35 86:35 87:35 88:35 89:35 90:35 91:36 92:36 93:37 94:38 95:39 96:40 97:40 98:40 99:40 100:41 101:41 102:42 103:43 104:43 105:44 106:44 107:45 108:46 109:47 110:48 111:48 112:48 113:49 114:50 115:51 116:52 117:52 118:52 119:53 120:54 121:55 122:55 123:55 124:56 125:56 126:57 127:58 128:59 129:59 130:59 131:59 132:60 133:61 134:62 135:63 136:64 137:65 138:66 139:67 140:68 141:69 142:69 143:70 144:71 145:71 146:71 147:71 148:71 149:71 150:72 151:72 152:73 153:74 154:75 155:76 156:77 157:78 158:79 159:80 160:80 161:80 162:80 163:80 164:81 165:82 166:83 167:84 168:84 169:84 170:84 171:85 172:86 173:87 174:88 175:89 176:90 177:91 178:92 179:93 180:93 181:93 182:94 183:95 184:96 185:97 186:98 187:99 188:100 189:101 190:102 191:102 192:103 193:103 194:104 195:105 196:106 197:107 198:108 199:109 200:110 201:111 202:112 203:112\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 190 3351 17 6628 1890 17 28488 13650 9151 212 982 22 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 4\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁did ▁the ▁frank ish ▁identity ▁emerge ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:1 13:1 14:2 15:2 16:2 17:2 18:2 19:3 20:3 21:3 22:3 23:3 24:3 25:4 26:4 27:5 28:5 29:5 30:5 31:6 32:6 33:6 34:7 35:7 36:7 37:8 38:9 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:16 48:17 49:18 50:19 51:20 52:21 53:22 54:22 55:22 56:22 57:23 58:24 59:25 60:26 61:26 62:26 63:26 64:27 65:28 66:29 67:30 68:31 69:31 70:32 71:32 72:32 73:32 74:32 75:32 76:33 77:34 78:35 79:35 80:35 81:35 82:35 83:35 84:35 85:36 86:36 87:37 88:38 89:39 90:40 91:40 92:40 93:40 94:41 95:41 96:42 97:43 98:43 99:44 100:44 101:45 102:46 103:47 104:48 105:48 106:48 107:49 108:50 109:51 110:52 111:52 112:52 113:53 114:54 115:55 116:55 117:55 118:56 119:56 120:57 121:58 122:59 123:59 124:59 125:59 126:60 127:61 128:62 129:63 130:64 131:65 132:66 133:67 134:68 135:69 136:69 137:70 138:71 139:71 140:71 141:71 142:71 143:71 144:72 145:72 146:73 147:74 148:75 149:76 150:77 151:78 152:79 153:80 154:80 155:80 156:80 157:80 158:81 159:82 160:83 161:84 162:84 163:84 164:84 165:85 166:86 167:87 168:88 169:89 170:90 171:91 172:92 173:93 174:93 175:93 176:94 177:95 178:96 179:97 180:98 181:99 182:100 183:101 184:102 185:102 186:103 187:103 188:104 189:105 190:106 191:107 192:108 193:109 194:110 195:111 196:112 197:112\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 190 18 23675 1406 3643 7624 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 5\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁was ▁the ▁duke ▁in ▁the ▁battle ▁of ▁has ting s ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 30 18 25950 25 18 1727 20 51 1203 23 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 246\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 251\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁ william ▁the ▁con quer or\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 6\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁principal ity ▁did ▁ william ▁the ▁con quer er ▁found ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3824 769 190 17 31499 18 2147 10792 118 255 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 7\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁branch ▁of ▁theoretical ▁computer ▁science ▁deals ▁with ▁broadly ▁classify ing ▁computational ▁problems ▁by ▁difficulty ▁and ▁class ▁of ▁relationship ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 22:0 23:1 24:2 25:3 26:4 27:5 28:6 29:7 30:8 31:9 32:10 33:11 34:12 35:13 36:14 37:15 38:16 39:17 40:18 41:18 42:19 43:20 44:21 45:22 46:23 47:24 48:25 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:32 57:32 58:33 59:34 60:35 61:36 62:37 63:38 64:39 65:40 66:41 67:42 68:43 69:44 70:45 71:46 72:46 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:64 93:65 94:66 95:67 96:68 97:69 98:69 99:70 100:71 101:72 102:73 103:73\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3709 20 13109 920 1767 4108 33 16026 27871 56 23228 708 37 6157 21 1075 20 1498 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 22\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 24\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁computational ▁complexity ▁theory\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 8\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁is ▁a ▁manual ▁application ▁of ▁mathematical ▁steps ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:13 25:14 26:15 27:16 28:17 29:18 30:18 31:19 32:20 33:21 34:22 35:23 36:24 37:25 38:25 39:26 40:27 41:28 42:29 43:30 44:31 45:32 46:32 47:33 48:34 49:35 50:36 51:37 52:38 53:39 54:40 55:41 56:42 57:43 58:44 59:45 60:46 61:46 62:46 63:47 64:48 65:49 66:50 67:51 68:52 69:52 70:53 71:54 72:55 73:56 74:57 75:58 76:59 77:60 78:61 79:62 80:63 81:64 82:65 83:66 84:67 85:68 86:69 87:69 88:70 89:71 90:72 91:73 92:73\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 27 24 6403 1479 20 12956 2094 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 9\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁measure ▁of ▁a ▁computational ▁problem ▁broadly ▁defines ▁the ▁inherent ▁difficulty ▁of ▁the ▁solution ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 17:0 18:1 19:2 20:3 21:4 22:5 23:6 24:7 25:8 26:9 27:10 28:11 29:12 30:12 31:13 32:14 33:15 34:16 35:16 36:17 37:18 38:19 39:19 40:20 41:21 42:21 43:22 44:23 45:24 46:25 47:26 48:27 49:28 50:29 51:30 52:31 53:32 54:33 55:33 56:34 57:35 58:36 59:37 60:38 61:39 62:40 63:41 64:41 65:42 66:43 67:44 68:45 69:46 70:46 71:47 72:48 73:49 74:50 75:51 76:52 77:52 78:53 79:54 80:55 81:56 82:57 83:58 84:59 85:59 86:59 87:60 88:61 89:62 90:62 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:69 99:70 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:75 108:76 109:77 110:78 111:79 112:79 113:79 114:80 115:81 116:82 117:82 118:82 119:83 120:84 121:85 122:86 123:87 124:88 125:89 126:90 127:91 128:92 129:93 130:94 131:95 132:96 133:97 134:98 135:99 136:100 137:101 138:102 139:103 140:103\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2310 20 24 23228 662 16026 14668 18 16507 6157 20 18 1938 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 24\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 29\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁if ▁its ▁solution ▁requires ▁significant ▁resources\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 10\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁method ▁is ▁used ▁to ▁in tu itive ly ▁assess ▁or ▁quantify ▁the ▁amount ▁of ▁resources ▁required ▁to ▁solve ▁a ▁computational ▁problem ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 25:0 26:1 27:2 28:3 29:4 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:12 39:13 40:14 41:15 42:16 43:16 44:17 45:18 46:19 47:19 48:20 49:21 50:21 51:22 52:23 53:24 54:25 55:26 56:27 57:28 58:29 59:30 60:31 61:32 62:33 63:33 64:34 65:35 66:36 67:37 68:38 69:39 70:40 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:46 79:47 80:48 81:49 82:50 83:51 84:52 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:59 94:59 95:60 96:61 97:62 98:62 99:62 100:63 101:64 102:65 103:66 104:67 105:68 106:69 107:70 108:70 109:70 110:71 111:72 112:73 113:73 114:74 115:75 116:76 117:77 118:78 119:79 120:79 121:79 122:80 123:81 124:82 125:82 126:82 127:83 128:84 129:85 130:86 131:87 132:88 133:89 134:90 135:91 136:92 137:93 138:94 139:95 140:96 141:97 142:98 143:99 144:100 145:101 146:102 147:103 148:103\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2175 27 179 22 25 2853 9736 111 7329 49 30299 18 1065 20 1485 978 22 4929 24 23228 662 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 53\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 56\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁mathematical ▁models ▁of ▁computation\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000011\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 11\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁are ▁two ▁basic ▁primary ▁resources ▁used ▁to ▁ gu age ▁complexity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:6 22:7 23:8 24:9 25:10 26:11 27:12 28:12 29:13 30:14 31:15 32:16 33:16 34:17 35:18 36:19 37:19 38:20 39:21 40:21 41:22 42:23 43:24 44:25 45:26 46:27 47:28 48:29 49:30 50:31 51:32 52:33 53:33 54:34 55:35 56:36 57:37 58:38 59:39 60:40 61:41 62:41 63:42 64:43 65:44 66:45 67:46 68:46 69:47 70:48 71:49 72:50 73:51 74:52 75:52 76:53 77:54 78:55 79:56 80:57 81:58 82:59 83:59 84:59 85:60 86:61 87:62 88:62 89:62 90:63 91:64 92:65 93:66 94:67 95:68 96:69 97:70 98:70 99:70 100:71 101:72 102:73 103:73 104:74 105:75 106:76 107:77 108:78 109:79 110:79 111:79 112:80 113:81 114:82 115:82 116:82 117:83 118:84 119:85 120:86 121:87 122:88 123:89 124:90 125:91 126:92 127:93 128:94 129:95 130:96 131:97 132:98 133:99 134:100 135:101 136:102 137:103 138:103\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 41 87 1949 1827 1485 179 22 17 3017 981 11906 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 65\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 67\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁time ▁and ▁storage\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000012\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 12\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁unit ▁is ▁measured ▁to ▁determine ▁circuit ▁simplicity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:12 25:13 26:14 27:15 28:16 29:16 30:17 31:18 32:19 33:19 34:20 35:21 36:21 37:22 38:23 39:24 40:25 41:26 42:27 43:28 44:29 45:30 46:31 47:32 48:33 49:33 50:34 51:35 52:36 53:37 54:38 55:39 56:40 57:41 58:41 59:42 60:43 61:44 62:45 63:46 64:46 65:47 66:48 67:49 68:50 69:51 70:52 71:52 72:53 73:54 74:55 75:56 76:57 77:58 78:59 79:59 80:59 81:60 82:61 83:62 84:62 85:62 86:63 87:64 88:65 89:66 90:67 91:68 92:69 93:70 94:70 95:70 96:71 97:72 98:73 99:73 100:74 101:75 102:76 103:77 104:78 105:79 106:79 107:79 108:80 109:81 110:82 111:82 112:82 113:83 114:84 115:85 116:86 117:87 118:88 119:89 120:90 121:91 122:92 123:93 124:94 125:95 126:96 127:97 128:98 129:99 130:100 131:101 132:102 133:103 134:103\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 1591 27 7375 22 2081 5034 18950 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000013\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 13\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁number ▁is ▁used ▁in ▁perpendicular ▁computing ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 18:8 19:9 20:10 21:11 22:12 23:12 24:13 25:14 26:15 27:16 28:16 29:17 30:18 31:19 32:19 33:20 34:21 35:21 36:22 37:23 38:24 39:25 40:26 41:27 42:28 43:29 44:30 45:31 46:32 47:33 48:33 49:34 50:35 51:36 52:37 53:38 54:39 55:40 56:41 57:41 58:42 59:43 60:44 61:45 62:46 63:46 64:47 65:48 66:49 67:50 68:51 69:52 70:52 71:53 72:54 73:55 74:56 75:57 76:58 77:59 78:59 79:59 80:60 81:61 82:62 83:62 84:62 85:63 86:64 87:65 88:66 89:67 90:68 91:69 92:70 93:70 94:70 95:71 96:72 97:73 98:73 99:74 100:75 101:76 102:77 103:78 104:79 105:79 106:79 107:80 108:81 109:82 110:82 111:82 112:83 113:84 114:85 115:86 116:87 117:88 118:89 119:90 120:91 121:92 122:93 123:94 124:95 125:96 126:97 127:98 128:99 129:100 130:101 131:102 132:103 133:103\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 243 27 179 25 30525 9848 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_train_xlnet-base-cased_384\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - ***** Running training *****\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Num examples = 14\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Num Epochs = 3\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Instantaneous batch size per GPU = 8\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Gradient Accumulation steps = 1\n", + "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Total optimization steps = 6\n", "Epoch: 0%| | 0/3 [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mout_eval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal_prediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m''\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/content/cdQA/cdqa/reader/reader_sklearn.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 582\u001b[0;31m \u001b[0mout_eval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal_prediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprefix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout_eval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal_prediction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/content/cdQA/cdqa/reader/reader_sklearn.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(args, model, tokenizer, prefix)\u001b[0m\n\u001b[1;32m 364\u001b[0m \u001b[0moutput_nbest_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_null_log_odds_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_n_top\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend_n_top\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 366\u001b[0;31m args.version_2_with_negative, tokenizer, args.verbose_logging)\n\u001b[0m\u001b[1;32m 367\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 368\u001b[0m write_predictions(examples, features, all_results, args.n_best_size,\n", - "\u001b[0;32m/content/cdQA/cdqa/reader/utils_squad.py\u001b[0m in \u001b[0;36mwrite_predictions_extended\u001b[0;34m(all_examples, all_features, all_results, n_best_size, max_answer_length, output_prediction_file, output_nbest_file, output_null_log_odds_file, orig_data_file, start_n_top, end_n_top, version_2_with_negative, tokenizer, verbose_logging)\u001b[0m\n\u001b[1;32m 892\u001b[0m \u001b[0mwriter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores_diff_json\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 894\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0morig_data_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 895\u001b[0m \u001b[0morig_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"data\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 896\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not list" - ] } ] }, @@ -1631,20 +1629,72 @@ "metadata": { "id": "J_72WSnDlAxn", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 734 + }, + "outputId": "59e5dbca-eb62-4ace-d6fd-a52a9ff8e2d2" }, "source": [ "!ls -la" ], - "execution_count": 0, - "outputs": [] + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "text": [ + "total 546040\n", + "drwxr-xr-x 10 root root 4096 Jul 18 09:00 .\n", + "drwxr-xr-x 1 root root 4096 Jul 18 08:52 ..\n", + "-rw-r--r-- 1 root root 2 Jul 18 08:59 added_tokens.json\n", + "-rw-r--r-- 1 root root 815 Jul 18 08:52 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 18 08:52 apt.txt\n", + "-rw-r--r-- 1 root root 63330 Jul 18 09:00 cached_dev_xlnet-base-cased_384\n", + "-rw-r--r-- 1 root root 101136 Jul 18 08:53 cached_train_xlnet-base-cased_384\n", + "drwxr-xr-x 7 root root 4096 Jul 18 08:52 cdqa\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 cdqa.egg-info\n", + "-rw-r--r-- 1 root root 641 Jul 18 08:59 config.json\n", + "drwxr-xr-x 3 root root 4096 Jul 18 09:00 data\n", + "-rw-r--r-- 1 root root 4854279 Jul 18 08:52 dev-v1.1.json\n", + "-rw-r--r-- 1 root root 4370528 Jul 18 08:52 dev-v2.0.json\n", + "-rw-r--r-- 1 root root 8786 Jul 18 08:53 dev-v2.0-small.json\n", + "-rw-r--r-- 1 root root 1452 Jul 18 08:52 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 18 08:52 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 18 08:52 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 18 08:52 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 18 08:52 MANIFEST.in\n", + "-rw-r--r-- 1 root root 12357 Jul 18 09:01 nbest_predictions_.json\n", + "-rw-r--r-- 1 root root 723 Jul 18 09:01 null_odds_.json\n", + "-rw-r--r-- 1 root root 1349 Jul 18 09:01 predictions_.json\n", + "-rw-r--r-- 1 root root 476371987 Jul 18 08:59 pytorch_model.bin\n", + "-rw-r--r-- 1 root root 12855 Jul 18 08:52 README.md\n", + "-rw-r--r-- 1 root root 129 Jul 18 08:52 requirements.txt\n", + "drwxr-xr-x 3 root root 4096 Jul 18 08:53 runs\n", + "-rw-r--r-- 1 root root 727 Jul 18 08:52 setup.py\n", + "-rw-r--r-- 1 root root 202 Jul 18 08:59 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 798011 Jul 18 08:59 spiece.model\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 tests\n", + "-rw-r--r-- 1 root root 1244 Jul 18 08:59 training_args.bin\n", + "-rw-r--r-- 1 root root 30288272 Jul 18 08:52 train-v1.1.json\n", + "-rw-r--r-- 1 root root 42123633 Jul 18 08:52 train-v2.0.json\n", + "-rw-r--r-- 1 root root 312 Jul 18 08:52 .travis.yml\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "code", "metadata": { "id": "irjokX-mQvmY", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 95 + }, + "outputId": "fbc84da8-167e-4411-eb28-888fc56ee6f6" }, "source": [ "# print('query: {}'.format(query))\n", @@ -1652,34 +1702,116 @@ "print('title: {}'.format(final_prediction[1]))\n", "print('paragraph: {}'.format(final_prediction[2]))" ], - "execution_count": 0, - "outputs": [] + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "text": [ + "answer: career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’\n", + "title: BNP Paribas Graduate Programs in France\n", + "paragraph: Looking to kickstart your career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’ Graduate Programs. These challenging 18-month programs spent in different operational functions enable participants to expand their banking knowledge, skills and professional networks. Learn more about this promising initiative.\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "code", "metadata": { "id": "0GObRQ1rJs-K", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 734 + }, + "outputId": "e69849f3-f520-4116-b8bf-846705f4bb9f" }, "source": [ "!ls -la" ], - "execution_count": 0, - "outputs": [] + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "text": [ + "total 546040\n", + "drwxr-xr-x 10 root root 4096 Jul 18 09:00 .\n", + "drwxr-xr-x 1 root root 4096 Jul 18 08:52 ..\n", + "-rw-r--r-- 1 root root 2 Jul 18 08:59 added_tokens.json\n", + "-rw-r--r-- 1 root root 815 Jul 18 08:52 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 18 08:52 apt.txt\n", + "-rw-r--r-- 1 root root 63330 Jul 18 09:00 cached_dev_xlnet-base-cased_384\n", + "-rw-r--r-- 1 root root 101136 Jul 18 08:53 cached_train_xlnet-base-cased_384\n", + "drwxr-xr-x 7 root root 4096 Jul 18 08:52 cdqa\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 cdqa.egg-info\n", + "-rw-r--r-- 1 root root 641 Jul 18 08:59 config.json\n", + "drwxr-xr-x 3 root root 4096 Jul 18 09:00 data\n", + "-rw-r--r-- 1 root root 4854279 Jul 18 08:52 dev-v1.1.json\n", + "-rw-r--r-- 1 root root 4370528 Jul 18 08:52 dev-v2.0.json\n", + "-rw-r--r-- 1 root root 8786 Jul 18 08:53 dev-v2.0-small.json\n", + "-rw-r--r-- 1 root root 1452 Jul 18 08:52 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 18 08:52 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 18 08:52 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 18 08:52 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 18 08:52 MANIFEST.in\n", + "-rw-r--r-- 1 root root 12357 Jul 18 09:01 nbest_predictions_.json\n", + "-rw-r--r-- 1 root root 723 Jul 18 09:01 null_odds_.json\n", + "-rw-r--r-- 1 root root 1349 Jul 18 09:01 predictions_.json\n", + "-rw-r--r-- 1 root root 476371987 Jul 18 08:59 pytorch_model.bin\n", + "-rw-r--r-- 1 root root 12855 Jul 18 08:52 README.md\n", + "-rw-r--r-- 1 root root 129 Jul 18 08:52 requirements.txt\n", + "drwxr-xr-x 3 root root 4096 Jul 18 08:53 runs\n", + "-rw-r--r-- 1 root root 727 Jul 18 08:52 setup.py\n", + "-rw-r--r-- 1 root root 202 Jul 18 08:59 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 798011 Jul 18 08:59 spiece.model\n", + "drwxr-xr-x 2 root root 4096 Jul 18 08:52 tests\n", + "-rw-r--r-- 1 root root 1244 Jul 18 08:59 training_args.bin\n", + "-rw-r--r-- 1 root root 30288272 Jul 18 08:52 train-v1.1.json\n", + "-rw-r--r-- 1 root root 42123633 Jul 18 08:52 train-v2.0.json\n", + "-rw-r--r-- 1 root root 312 Jul 18 08:52 .travis.yml\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "code", "metadata": { "id": "ikxxSgPPLP9C", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 289 + }, + "outputId": "7da9448f-9eda-4a6b-8c9c-ce91a0274ae9" }, "source": [ "!cat predictions_.json" ], - "execution_count": 0, - "outputs": [] + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "text": [ + "{\n", + " \"100e1c8e-69f7-4f4b-9f3c-936f33bcc71e\": \"business schools\",\n", + " \"cfac01a5-98eb-4d76-8066-adb63e24751c\": \"future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the\",\n", + " \"dbba50ec-f907-46ed-9f8a-e106b17585f5\": \"Jean-Dominique Criscuolo, \\\"BNP Paribas\",\n", + " \"4cbf8827-c3a9-42c2-9bd2-fd3ca38fc2e1\": \"Gilles Deschanel, in charge of academic partnerships at BNP Paribas\",\n", + " \"9b1e4395-6554-4fba-bd80-cd9639a687f4\": \"a faster rate. The program allows participants to discover the BNP Paribas\",\n", + " \"2e266853-ed16-4fce-9701-a5d5c7005b80\": \"career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas\\u2019\",\n", + " \"56d2240c-0d26-4534-bf99-87aecec38523\": \"their talent to maintain its edge in the market. As it works to build the future of\",\n", + " \"719ec8c6-fd2b-4ee5-b4b5-9c77b6f7e27c\": \"the entity. As soon as they are hired, participants also become members of a \\u201ctrainee\",\n", + " \"533f1412-6289-4a83-bb96-efbae2ba508c\": \", economics\",\n", + " \"062d4586-93bf-4ab3-b03f-f15aa89f39e8\": \"initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have\",\n", + " \"60844039-fbfb-48ba-bd8a-824c0fc36935\": \"Let\\u2019s take a\"\n", + "}\n" + ], + "name": "stdout" + } + ] } ] } \ No newline at end of file From b4f1a1fc150b7a6b345e31d0a71df23a017cb82f Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Thu, 18 Jul 2019 11:28:28 +0200 Subject: [PATCH 31/43] add script to train xlnet reader on SQuAD 2.0 --- examples/tutorial-train-xlnet-squad.py | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 examples/tutorial-train-xlnet-squad.py diff --git a/examples/tutorial-train-xlnet-squad.py b/examples/tutorial-train-xlnet-squad.py new file mode 100644 index 00000000..d7cdf3f1 --- /dev/null +++ b/examples/tutorial-train-xlnet-squad.py @@ -0,0 +1,35 @@ +import wget +import os +import torch +from sklearn.externals import joblib +from cdqa.reader.reader_sklearn import Reader + +# download SQuAD 2.0 assets +squad_urls = [ + 'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json', + 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json' +] + +for squad_url in squad_urls: + wget.download(url=squad_url, out='.') + +# cast Reader class with train params +reader = Reader(train_file='train-v2.0.json', + predict_file='dev-v2.0.json', + model_type='xlnet', + model_name_or_path='xlnet-base-cased', + fp16=False, + output_dir='.') + +# train the model +reader.fit(X='') + +# save GPU version locally +joblib.dump(reader, os.path.join(reader.output_dir, 'xlnet_qa_vGPU.joblib')) + +# send current reader model to CPU +reader.model.to('cpu') +reader.device = torch.device('cpu') + +# save CPU it locally +joblib.dump(reader, os.path.join(reader.output_dir, 'bert_qa_vCPU.joblib')) \ No newline at end of file From bae914345241fb91ad323b8a8a0eafa7e7d743d6 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Thu, 18 Jul 2019 11:45:32 +0200 Subject: [PATCH 32/43] allow reader.fit(X='train-v2.0.json') --- cdqa/reader/reader_sklearn.py | 21 ++++++++------------- examples/tutorial-train-xlnet-squad.py | 11 ++++++----- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index b643d779..5c38b859 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -187,8 +187,8 @@ def train(args, train_dataset, model, tokenizer): return global_step, tr_loss / global_step -def evaluate(args, model, tokenizer, prefix=""): - dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True) +def evaluate(input_file, args, model, tokenizer, prefix=""): + dataset, examples, features = load_and_cache_examples(input_file, args, tokenizer, evaluate=True, output_examples=True) if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) @@ -260,9 +260,8 @@ def evaluate(args, model, tokenizer, prefix=""): return results -def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): +def load_and_cache_examples(input_file, args, tokenizer, evaluate=False, output_examples=False): # Load data features from cache or dataset file - input_file = args.predict_file if evaluate else args.train_file cached_features_file = os.path.join(os.path.dirname(input_file) if isinstance(input_file, str) else '', 'cached_{}_{}_{}'.format( 'dev' if evaluate else 'train', list(filter(None, args.model_name_or_path.split('/'))).pop(), @@ -307,8 +306,8 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal return dataset -def predict(args, model, tokenizer, prefix=""): - dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True) +def predict(input_file, args, model, tokenizer, prefix=""): + dataset, examples, features = load_and_cache_examples(input_file, args, tokenizer, evaluate=True, output_examples=True) if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) @@ -378,8 +377,6 @@ class Reader(BaseEstimator): """ def __init__(self, - train_file=None, - predict_file=None, model_type=None, model_name_or_path=None, output_dir=None, @@ -422,8 +419,6 @@ def __init__(self, server_port='', pretrained_model_path=None): - self.train_file = train_file - self.predict_file = predict_file self.model_type = model_type self.model_name_or_path = model_name_or_path self.output_dir = output_dir @@ -528,7 +523,7 @@ def fit(self, X, y=None): if os.path.exists(self.output_dir) and os.listdir(self.output_dir) and not self.overwrite_output_dir: raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(self.output_dir)) - train_dataset = load_and_cache_examples(self, self.tokenizer, evaluate=False, output_examples=False) + train_dataset = load_and_cache_examples(input_file=X, args=self, tokenizer=self.tokenizer, evaluate=False, output_examples=False) global_step, tr_loss = train(self, train_dataset, self.model, self.tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) @@ -569,7 +564,7 @@ def evaluate(self, X): self.model.to(self.device) # Evaluate - result = evaluate(self, self.model, self.tokenizer, prefix=global_step) + result = evaluate(input_file=X, args=self, model=self.model, tokenizer=self.tokenizer, prefix=global_step) result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) @@ -580,6 +575,6 @@ def evaluate(self, X): def predict(self, X): - out_eval, final_prediction = predict(self, self.model, self.tokenizer, prefix="") + out_eval, final_prediction = predict(input_file=X, args=self, model=self.model, tokenizer=self.tokenizer, prefix="") return out_eval, final_prediction diff --git a/examples/tutorial-train-xlnet-squad.py b/examples/tutorial-train-xlnet-squad.py index d7cdf3f1..269faec0 100644 --- a/examples/tutorial-train-xlnet-squad.py +++ b/examples/tutorial-train-xlnet-squad.py @@ -14,15 +14,13 @@ wget.download(url=squad_url, out='.') # cast Reader class with train params -reader = Reader(train_file='train-v2.0.json', - predict_file='dev-v2.0.json', - model_type='xlnet', +reader = Reader(model_type='xlnet', model_name_or_path='xlnet-base-cased', fp16=False, output_dir='.') # train the model -reader.fit(X='') +reader.fit(X='train-v2.0.json') # save GPU version locally joblib.dump(reader, os.path.join(reader.output_dir, 'xlnet_qa_vGPU.joblib')) @@ -32,4 +30,7 @@ reader.device = torch.device('cpu') # save CPU it locally -joblib.dump(reader, os.path.join(reader.output_dir, 'bert_qa_vCPU.joblib')) \ No newline at end of file +joblib.dump(reader, os.path.join(reader.output_dir, 'bert_qa_vCPU.joblib')) + +# evaluate the model +reader.evaluate(X='dev-v2.0.json') \ No newline at end of file From 8e7bb0eac7861dea30de4284ec259dfba8dbf6f1 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Thu, 18 Jul 2019 12:09:30 +0200 Subject: [PATCH 33/43] small fixes --- cdqa/reader/reader_sklearn.py | 6 +++--- examples/tutorial-train-xlnet-squad.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 5c38b859..a8778401 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -243,7 +243,7 @@ def evaluate(input_file, args, model, tokenizer, prefix=""): # XLNet uses a more complex post-processing procedure write_predictions_extended(examples, features, all_results, args.n_best_size, args.max_answer_length, output_prediction_file, - output_nbest_file, output_null_log_odds_file, args.predict_file, + output_nbest_file, output_null_log_odds_file, input_file, model.config.start_n_top, model.config.end_n_top, args.version_2_with_negative, tokenizer, args.verbose_logging) else: @@ -253,7 +253,7 @@ def evaluate(input_file, args, model, tokenizer, prefix=""): args.version_2_with_negative, args.null_score_diff_threshold) # Evaluate with the official SQuAD script - evaluate_options = EVAL_OPTS(data_file=args.predict_file, + evaluate_options = EVAL_OPTS(data_file=input_file, pred_file=output_prediction_file, na_prob_file=output_null_log_odds_file) results = evaluate_on_squad(evaluate_options) @@ -361,7 +361,7 @@ def predict(input_file, args, model, tokenizer, prefix=""): # XLNet uses a more complex post-processing procedure out_eval, final_prediction = write_predictions_extended(examples, features, all_results, args.n_best_size, args.max_answer_length, output_prediction_file, - output_nbest_file, output_null_log_odds_file, args.predict_file, + output_nbest_file, output_null_log_odds_file, input_file, model.config.start_n_top, model.config.end_n_top, args.version_2_with_negative, tokenizer, args.verbose_logging) else: diff --git a/examples/tutorial-train-xlnet-squad.py b/examples/tutorial-train-xlnet-squad.py index 269faec0..c6af072e 100644 --- a/examples/tutorial-train-xlnet-squad.py +++ b/examples/tutorial-train-xlnet-squad.py @@ -30,7 +30,7 @@ reader.device = torch.device('cpu') # save CPU it locally -joblib.dump(reader, os.path.join(reader.output_dir, 'bert_qa_vCPU.joblib')) +joblib.dump(reader, os.path.join(reader.output_dir, 'xlnet_qa_vCPU.joblib')) # evaluate the model -reader.evaluate(X='dev-v2.0.json') \ No newline at end of file +out_eval, final_prediction = reader.evaluate(X='dev-v2.0.json') \ No newline at end of file From 9081abc77119916c601b631d8a8c96574c787b0b Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Thu, 18 Jul 2019 12:25:40 +0200 Subject: [PATCH 34/43] update notebook (workflow verified) --- examples/tutorial-train-xlnet-squad.ipynb | 2127 ++++++++------------- 1 file changed, 786 insertions(+), 1341 deletions(-) diff --git a/examples/tutorial-train-xlnet-squad.ipynb b/examples/tutorial-train-xlnet-squad.ipynb index c2b0142a..8a7b8844 100644 --- a/examples/tutorial-train-xlnet-squad.ipynb +++ b/examples/tutorial-train-xlnet-squad.ipynb @@ -31,7 +31,7 @@ "metadata": { "id": "zNtCqwveFjcK", "colab_type": "code", - "outputId": "51af972d-83a0-4187-c207-ada3bef5bebd", + "outputId": "c4404e96-13b7-44ee-c479-c848e02aa23d", "colab": { "base_uri": "https://localhost:8080/", "height": 153 @@ -46,12 +46,12 @@ "output_type": "stream", "text": [ "Cloning into 'cdQA'...\n", - "remote: Enumerating objects: 152, done.\u001b[K\n", - "remote: Counting objects: 100% (152/152), done.\u001b[K\n", - "remote: Compressing objects: 100% (103/103), done.\u001b[K\n", - "remote: Total 948 (delta 99), reused 96 (delta 49), pack-reused 796\u001b[K\n", - "Receiving objects: 100% (948/948), 351.74 KiB | 733.00 KiB/s, done.\n", - "Resolving deltas: 100% (566/566), done.\n" + "remote: Enumerating objects: 174, done.\u001b[K\n", + "remote: Counting objects: 100% (174/174), done.\u001b[K\n", + "remote: Compressing objects: 100% (121/121), done.\u001b[K\n", + "remote: Total 970 (delta 117), reused 104 (delta 53), pack-reused 796\u001b[K\n", + "Receiving objects: 100% (970/970), 365.53 KiB | 1.25 MiB/s, done.\n", + "Resolving deltas: 100% (584/584), done.\n" ], "name": "stdout" } @@ -77,7 +77,7 @@ "metadata": { "id": "5jBtSKczGF38", "colab_type": "code", - "outputId": "925c98ca-29f2-405d-a03e-cd45c3c77659", + "outputId": "c8e8ae59-c3f4-4d59-81b5-51c46aa316bd", "colab": { "base_uri": "https://localhost:8080/", "height": 56 @@ -103,82 +103,28 @@ "metadata": { "id": "DHl2HUX1GRd6", "colab_type": "code", - "outputId": "e3c0e779-b07f-47d4-e044-fc4115d3e2d9", + "outputId": "954dc6ec-8564-4453-e7e1-9b417941b1e2", "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 172 } }, "source": [ - "!pip install -e ." + "!pip install -q -e ." ], "execution_count": 4, "outputs": [ { "output_type": "stream", "text": [ - "Obtaining file:///content/cdQA\n", - "Requirement already satisfied: Flask in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (1.1.1)\n", - "Collecting flask_cors (from cdqa==1.0.3)\n", - " Downloading https://files.pythonhosted.org/packages/78/38/e68b11daa5d613e3a91e4bf3da76c94ac9ee0d9cd515af9c1ab80d36f709/Flask_Cors-3.0.8-py2.py3-none-any.whl\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.13.2)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.24.2)\n", - "Requirement already satisfied: prettytable in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.7.2)\n", - "Collecting pytorch_pretrained_bert (from cdqa==1.0.3)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)\n", - "\u001b[K |████████████████████████████████| 133kB 53.7MB/s \n", - "\u001b[?25hCollecting pytorch-transformers (from cdqa==1.0.3)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/b5/2d78e74001af0152ee61d5ad4e290aec9a1e43925b21df2dc74ec100f1ab/pytorch_transformers-1.0.0-py3-none-any.whl (137kB)\n", - "\u001b[K |████████████████████████████████| 143kB 63.5MB/s \n", - "\u001b[?25hCollecting tensorboardX (from cdqa==1.0.3)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c3/12/dcaf67e1312475b26db9e45e7bb6f32b540671a9ee120b3a72d9e09bc517/tensorboardX-1.8-py2.py3-none-any.whl (216kB)\n", - "\u001b[K |████████████████████████████████| 225kB 59.3MB/s \n", - "\u001b[?25hRequirement already satisfied: scikit_learn in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (0.21.2)\n", - "Collecting tika (from cdqa==1.0.3)\n", - " Downloading https://files.pythonhosted.org/packages/10/75/b566e446ffcf292f10c8d84c15a3d91615fe3d7ca8072a17c949d4e84b66/tika-1.19.tar.gz\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from cdqa==1.0.3) (4.28.1)\n", - "Collecting wget (from cdqa==1.0.3)\n", - " Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip\n", - "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (1.1.0)\n", - "Requirement already satisfied: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (0.15.4)\n", - "Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (7.0)\n", - "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask->cdqa==1.0.3) (2.10.1)\n", - "Requirement already satisfied: Six in /usr/local/lib/python3.6/dist-packages (from flask_cors->cdqa==1.0.3) (1.12.0)\n", - "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (1.16.4)\n", - "Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2.5.3)\n", - "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas->cdqa==1.0.3) (2018.9)\n", - "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.9.185)\n", - "Requirement already satisfied: torch>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (1.1.0)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from pytorch_pretrained_bert->cdqa==1.0.3) (2.21.0)\n", - "Collecting regex (from pytorch_pretrained_bert->cdqa==1.0.3)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB)\n", - "\u001b[K |████████████████████████████████| 655kB 38.0MB/s \n", - "\u001b[?25hCollecting sentencepiece (from pytorch-transformers->cdqa==1.0.3)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/00/95/7f357995d5eb1131aa2092096dca14a6fc1b1d2860bd99c22a612e1d1019/sentencepiece-0.1.82-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n", - "\u001b[K |████████████████████████████████| 1.0MB 46.9MB/s \n", - "\u001b[?25hRequirement already satisfied: protobuf>=3.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardX->cdqa==1.0.3) (3.7.1)\n", - "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit_learn->cdqa==1.0.3) (1.3.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from tika->cdqa==1.0.3) (41.0.1)\n", - "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->Flask->cdqa==1.0.3) (1.1.1)\n", - "Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.2.1)\n", - "Requirement already satisfied: botocore<1.13.0,>=1.12.185 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (1.12.185)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.9.4)\n", - "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2.8)\n", - "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (2019.6.16)\n", - "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->pytorch_pretrained_bert->cdqa==1.0.3) (1.24.3)\n", - "Requirement already satisfied: docutils>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.13.0,>=1.12.185->boto3->pytorch_pretrained_bert->cdqa==1.0.3) (0.14)\n", - "Building wheels for collected packages: tika, wget, regex\n", - " Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Stored in directory: /root/.cache/pip/wheels/b4/db/8a/3a3f0c0725448eaa92703e3dda71e29dc13a119ff6c1036848\n", + "\u001b[K |████████████████████████████████| 133kB 10.3MB/s \n", + "\u001b[K |████████████████████████████████| 143kB 43.0MB/s \n", + "\u001b[K |████████████████████████████████| 225kB 43.8MB/s \n", + "\u001b[K |████████████████████████████████| 655kB 37.5MB/s \n", + "\u001b[K |████████████████████████████████| 1.0MB 33.1MB/s \n", + "\u001b[?25h Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f\n", - " Building wheel for regex (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Stored in directory: /root/.cache/pip/wheels/35/e4/80/abf3b33ba89cf65cd262af8a22a5a999cc28fbfabea6b38473\n", - "Successfully built tika wget regex\n", - "Installing collected packages: flask-cors, regex, pytorch-pretrained-bert, sentencepiece, pytorch-transformers, tensorboardX, tika, wget, cdqa\n", - " Running setup.py develop for cdqa\n", - "Successfully installed cdqa flask-cors-3.0.8 pytorch-pretrained-bert-0.6.2 pytorch-transformers-1.0.0 regex-2019.6.8 sentencepiece-0.1.82 tensorboardX-1.8 tika-1.19 wget-3.2\n" + " Building wheel for regex (setup.py) ... \u001b[?25l\u001b[?25hdone\n" ], "name": "stdout" } @@ -210,45 +156,33 @@ { "cell_type": "code", "metadata": { - "id": "50r4anYBITRO", + "id": "ylorIsqLz_J3", "colab_type": "code", - "outputId": "5560fd38-1679-464e-87a0-c4c71c6b1828", + "outputId": "c9341e63-e0d2-415e-9466-c93984c3705b", "colab": { "base_uri": "https://localhost:8080/", - "height": 462 + "height": 250 } }, "source": [ - "!ls -la" + "!wget https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json" ], "execution_count": 6, "outputs": [ { "output_type": "stream", "text": [ - "total 79828\n", - "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .\n", - "drwxr-xr-x 1 root root 4096 Jul 18 08:52 ..\n", - "-rw-r--r-- 1 root root 815 Jul 18 08:52 api.py\n", - "-rw-r--r-- 1 root root 22 Jul 18 08:52 apt.txt\n", - "drwxr-xr-x 6 root root 4096 Jul 18 08:52 cdqa\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 cdqa.egg-info\n", - "-rw-r--r-- 1 root root 4854279 Jul 18 08:52 dev-v1.1.json\n", - "-rw-r--r-- 1 root root 4370528 Jul 18 08:52 dev-v2.0.json\n", - "-rw-r--r-- 1 root root 1452 Jul 18 08:52 download.py\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 examples\n", - "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .git\n", - "drwxr-xr-x 3 root root 4096 Jul 18 08:52 .github\n", - "-rw-r--r-- 1 root root 1375 Jul 18 08:52 .gitignore\n", - "-rw-r--r-- 1 root root 11356 Jul 18 08:52 LICENSE\n", - "-rw-r--r-- 1 root root 40 Jul 18 08:52 MANIFEST.in\n", - "-rw-r--r-- 1 root root 12855 Jul 18 08:52 README.md\n", - "-rw-r--r-- 1 root root 129 Jul 18 08:52 requirements.txt\n", - "-rw-r--r-- 1 root root 727 Jul 18 08:52 setup.py\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 tests\n", - "-rw-r--r-- 1 root root 30288272 Jul 18 08:52 train-v1.1.json\n", - "-rw-r--r-- 1 root root 42123633 Jul 18 08:52 train-v2.0.json\n", - "-rw-r--r-- 1 root root 312 Jul 18 08:52 .travis.yml\n" + "--2019-07-18 10:12:11-- https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 8786 (8.6K) [text/plain]\n", + "Saving to: ‘dev-v2.0-small.json’\n", + "\n", + "\rdev-v2.0-small.json 0%[ ] 0 --.-KB/s \rdev-v2.0-small.json 100%[===================>] 8.58K --.-KB/s in 0s \n", + "\n", + "2019-07-18 10:12:11 (95.5 MB/s) - ‘dev-v2.0-small.json’ saved [8786/8786]\n", + "\n" ], "name": "stdout" } @@ -263,7 +197,7 @@ }, "id": "umJkmO9HFf3L", "colab_type": "code", - "outputId": "7baca070-08ae-49f4-c464-50bc3c40b741", + "outputId": "20e10dc5-a760-471e-99b7-f93127162702", "colab": { "base_uri": "https://localhost:8080/", "height": 76 @@ -290,234 +224,37 @@ { "cell_type": "code", "metadata": { - "id": "ylorIsqLz_J3", + "id": "wJYzc9-ie_bc", "colab_type": "code", - "outputId": "0d0cd98f-f44c-4e3d-eaf1-caef8aeff290", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 230 - } - }, - "source": [ - "!wget https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "text": [ - "--2019-07-18 08:53:01-- https://raw.githubusercontent.com/huggingface/pytorch-transformers/master/examples/tests_samples/SQUAD/dev-v2.0-small.json\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 8786 (8.6K) [text/plain]\n", - "Saving to: ‘dev-v2.0-small.json’\n", - "\n", - "\rdev-v2.0-small.json 0%[ ] 0 --.-KB/s \rdev-v2.0-small.json 100%[===================>] 8.58K --.-KB/s in 0s \n", - "\n", - "2019-07-18 08:53:01 (92.6 MB/s) - ‘dev-v2.0-small.json’ saved [8786/8786]\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "sMRDXXFdaO7z", - "colab_type": "code", - "outputId": "e921c11f-70ce-4cfb-9519-131692dc787e", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 - } - }, - "source": [ - "!cat dev-v2.0-small.json" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "{\n", - " \"version\": \"v2.0\",\n", - " \"data\": [{\n", - " \"title\": \"Normans\",\n", - " \"paragraphs\": [{\n", - " \"qas\": [{\n", - " \"question\": \"In what country is Normandy located?\",\n", - " \"id\": \"56ddde6b9a695914005b9628\",\n", - " \"answers\": [{\n", - " \"text\": \"France\",\n", - " \"answer_start\": 159\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"question\": \"When were the Normans in Normandy?\",\n", - " \"id\": \"56ddde6b9a695914005b9629\",\n", - " \"answers\": [{\n", - " \"text\": \"10th and 11th centuries\",\n", - " \"answer_start\": 94\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"question\": \"From which countries did the Norse originate?\",\n", - " \"id\": \"56ddde6b9a695914005b962a\",\n", - " \"answers\": [{\n", - " \"text\": \"Denmark, Iceland and Norway\",\n", - " \"answer_start\": 256\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"plausible_answers\": [{\n", - " \"text\": \"Rollo\",\n", - " \"answer_start\": 308\n", - " }],\n", - " \"question\": \"Who did King Charles III swear fealty to?\",\n", - " \"id\": \"5ad39d53604f3c001a3fe8d3\",\n", - " \"answers\": [],\n", - " \"is_impossible\": true\n", - " }, {\n", - " \"plausible_answers\": [{\n", - " \"text\": \"10th century\",\n", - " \"answer_start\": 671\n", - " }],\n", - " \"question\": \"When did the Frankish identity emerge?\",\n", - " \"id\": \"5ad39d53604f3c001a3fe8d4\",\n", - " \"answers\": [],\n", - " \"is_impossible\": true\n", - " }],\n", - " \"context\": \"The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\\\"Norman\\\" comes from \\\"Norseman\\\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.\"\n", - " }, {\n", - " \"qas\": [{\n", - " \"question\": \"Who was the duke in the battle of Hastings?\",\n", - " \"id\": \"56dddf4066d3e219004dad5f\",\n", - " \"answers\": [{\n", - " \"text\": \"William the Conqueror\",\n", - " \"answer_start\": 1022\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"plausible_answers\": [{\n", - " \"text\": \"Antioch\",\n", - " \"answer_start\": 1295\n", - " }],\n", - " \"question\": \"What principality did William the conquerer found?\",\n", - " \"id\": \"5ad3a266604f3c001a3fea2b\",\n", - " \"answers\": [],\n", - " \"is_impossible\": true\n", - " }],\n", - " \"context\": \"The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable principality in feudal tenure. The Normans are noted both for their culture, such as their unique Romanesque architecture and musical traditions, and for their significant military accomplishments and innovations. Norman adventurers founded the Kingdom of Sicily under Roger II after conquering southern Italy on the Saracens and Byzantines, and an expedition on behalf of their duke, William the Conqueror, led to the Norman conquest of England at the Battle of Hastings in 1066. Norman cultural and military influence spread from these new European centres to the Crusader states of the Near East, where their prince Bohemond I founded the Principality of Antioch in the Levant, to Scotland and Wales in Great Britain, to Ireland, and to the coasts of north Africa and the Canary Islands.\"\n", - " }]\n", - " }, {\n", - " \"title\": \"Computational_complexity_theory\",\n", - " \"paragraphs\": [{\n", - " \"qas\": [{\n", - " \"question\": \"What branch of theoretical computer science deals with broadly classifying computational problems by difficulty and class of relationship?\",\n", - " \"id\": \"56e16182e3433e1400422e28\",\n", - " \"answers\": [{\n", - " \"text\": \"Computational complexity theory\",\n", - " \"answer_start\": 0\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"plausible_answers\": [{\n", - " \"text\": \"algorithm\",\n", - " \"answer_start\": 472\n", - " }],\n", - " \"question\": \"What is a manual application of mathematical steps?\",\n", - " \"id\": \"5ad5316b5b96ef001a10ab76\",\n", - " \"answers\": [],\n", - " \"is_impossible\": true\n", - " }],\n", - " \"context\": \"Computational complexity theory is a branch of the theory of computation in theoretical computer science that focuses on classifying computational problems according to their inherent difficulty, and relating those classes to each other. A computational problem is understood to be a task that is in principle amenable to being solved by a computer, which is equivalent to stating that the problem may be solved by mechanical application of mathematical steps, such as an algorithm.\"\n", - " }, {\n", - " \"qas\": [{\n", - " \"question\": \"What measure of a computational problem broadly defines the inherent difficulty of the solution?\",\n", - " \"id\": \"56e16839cd28a01900c67887\",\n", - " \"answers\": [{\n", - " \"text\": \"if its solution requires significant resources\",\n", - " \"answer_start\": 46\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"question\": \"What method is used to intuitively assess or quantify the amount of resources required to solve a computational problem?\",\n", - " \"id\": \"56e16839cd28a01900c67888\",\n", - " \"answers\": [{\n", - " \"text\": \"mathematical models of computation\",\n", - " \"answer_start\": 176\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"question\": \"What are two basic primary resources used to guage complexity?\",\n", - " \"id\": \"56e16839cd28a01900c67889\",\n", - " \"answers\": [{\n", - " \"text\": \"time and storage\",\n", - " \"answer_start\": 305\n", - " }],\n", - " \"is_impossible\": false\n", - " }, {\n", - " \"plausible_answers\": [{\n", - " \"text\": \"the number of gates in a circuit\",\n", - " \"answer_start\": 436\n", - " }],\n", - " \"question\": \"What unit is measured to determine circuit simplicity?\",\n", - " \"id\": \"5ad532575b96ef001a10ab7f\",\n", - " \"answers\": [],\n", - " \"is_impossible\": true\n", - " }, {\n", - " \"plausible_answers\": [{\n", - " \"text\": \"the number of processors\",\n", - " \"answer_start\": 502\n", - " }],\n", - " \"question\": \"What number is used in perpendicular computing?\",\n", - " \"id\": \"5ad532575b96ef001a10ab80\",\n", - " \"answers\": [],\n", - " \"is_impossible\": true\n", - " }],\n", - " \"context\": \"A problem is regarded as inherently difficult if its solution requires significant resources, whatever the algorithm used. The theory formalizes this intuition, by introducing mathematical models of computation to study these problems and quantifying the amount of resources needed to solve them, such as time and storage. Other complexity measures are also used, such as the amount of communication (used in communication complexity), the number of gates in a circuit (used in circuit complexity) and the number of processors (used in parallel computing). One of the roles of computational complexity theory is to determine the practical limits on what computers can and cannot do.\"\n", - " }]\n", - " }]\n", - "}" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "owyoli60qGb9", - "colab_type": "code", - "outputId": "ef6f4e1c-a4d6-42c8-ef4b-970e633389ec", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - } + }, + "outputId": "27602640-c67d-4032-b39a-bb791ca93d93" }, "source": [ - "reader = Reader(train_file='dev-v2.0-small.json',\n", - " predict_file='dev-v2.0-small.json',\n", - " model_type='xlnet',\n", + "# cast Reader class with train params\n", + "reader = Reader(model_type='xlnet',\n", " model_name_or_path='xlnet-base-cased',\n", " fp16=False,\n", - " output_dir='.')" + " output_dir='.')\n", + "\n", + "# train the model\n", + "reader.fit(X='dev-v2.0-small.json')" ], - "execution_count": 10, + "execution_count": 8, "outputs": [ { "output_type": "stream", "text": [ - "07/18/2019 08:53:10 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", - "07/18/2019 08:53:10 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json not found in cache, downloading to /tmp/tmpo_7tn6k6\n", - "100%|██████████| 641/641 [00:00<00:00, 372581.61B/s]\n", - "07/18/2019 08:53:11 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpo_7tn6k6 to cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/18/2019 08:53:11 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/18/2019 08:53:11 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpo_7tn6k6\n", - "07/18/2019 08:53:11 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/18/2019 08:53:11 - INFO - pytorch_transformers.modeling_utils - Model config {\n", + "07/18/2019 10:12:16 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "07/18/2019 10:12:17 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json not found in cache, downloading to /tmp/tmpbdkk4x32\n", + "100%|██████████| 641/641 [00:00<00:00, 209486.43B/s]\n", + "07/18/2019 10:12:17 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpbdkk4x32 to cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/18/2019 10:12:17 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/18/2019 10:12:17 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpbdkk4x32\n", + "07/18/2019 10:12:17 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/18/2019 10:12:17 - INFO - pytorch_transformers.modeling_utils - Model config {\n", " \"attn_type\": \"bi\",\n", " \"bi_data\": false,\n", " \"clamp_len\": -1,\n", @@ -548,21 +285,21 @@ " \"untie_r\": true\n", "}\n", "\n", - "07/18/2019 08:53:12 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model not found in cache, downloading to /tmp/tmpj2n7ud_j\n", - "100%|██████████| 798011/798011 [00:00<00:00, 892469.28B/s]\n", - "07/18/2019 08:53:13 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpj2n7ud_j to cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/18/2019 08:53:13 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/18/2019 08:53:13 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpj2n7ud_j\n", - "07/18/2019 08:53:13 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/18/2019 08:53:14 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin not found in cache, downloading to /tmp/tmplb24yv4w\n", - "100%|██████████| 467042463/467042463 [00:36<00:00, 12855738.60B/s]\n", - "07/18/2019 08:53:51 - INFO - pytorch_transformers.file_utils - copying /tmp/tmplb24yv4w to cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/18/2019 08:53:53 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/18/2019 08:53:53 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmplb24yv4w\n", - "07/18/2019 08:53:53 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/18/2019 08:53:58 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", - "07/18/2019 08:53:58 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", + "07/18/2019 10:12:18 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model not found in cache, downloading to /tmp/tmpf9gdwwk5\n", + "100%|██████████| 798011/798011 [00:00<00:00, 1625264.57B/s]\n", + "07/18/2019 10:12:19 - INFO - pytorch_transformers.file_utils - copying /tmp/tmpf9gdwwk5 to cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/18/2019 10:12:19 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/18/2019 10:12:19 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmpf9gdwwk5\n", + "07/18/2019 10:12:19 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/18/2019 10:12:19 - INFO - pytorch_transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin not found in cache, downloading to /tmp/tmph6h8z6zo\n", + "100%|██████████| 467042463/467042463 [00:18<00:00, 24948823.11B/s]\n", + "07/18/2019 10:12:38 - INFO - pytorch_transformers.file_utils - copying /tmp/tmph6h8z6zo to cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/18/2019 10:12:40 - INFO - pytorch_transformers.file_utils - creating metadata file for /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/18/2019 10:12:40 - INFO - pytorch_transformers.file_utils - removing temp file /tmp/tmph6h8z6zo\n", + "07/18/2019 10:12:40 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/18/2019 10:12:45 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", + "07/18/2019 10:12:45 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", " do_lower_case=True, do_train=None, doc_stride=128,\n", " eval_all_checkpoints=True, evaluate_during_training=True, fp16=False,\n", " fp16_opt_level='O1', gradient_accumulation_steps=1, learning_rate=5e-05,\n", @@ -571,225 +308,201 @@ " model_name_or_path='xlnet-base-cased', model_type='xlnet',\n", " n_best_size=20, no_cuda=True, null_score_diff_threshold=0.0,\n", " num_train_epochs=3.0, output_dir='.', overwrite_cache=True,\n", - " overwrite_output_dir=True, per_gpu_eval_batch_size=8, ...)\n" - ], - "name": "stderr" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "24eT2nuKtrqp", - "colab_type": "code", - "outputId": "bf9c82fa-677f-4772-f5c6-2d6f22b5f39a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - } - }, - "source": [ - "reader.fit(X='')" - ], - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "text": [ - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at dev-v2.0-small.json\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁in ▁what ▁country ▁is ▁nor man dy ▁located ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 25 113 234 27 2387 249 2087 798 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 61\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 63\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁ franc e\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 1\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁were ▁the ▁nor man s ▁in ▁nor man dy ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 13:0 14:1 15:1 16:1 17:2 18:2 19:2 20:2 21:2 22:3 23:3 24:3 25:3 26:3 27:3 28:4 29:4 30:5 31:5 32:5 33:5 34:6 35:6 36:6 37:7 38:7 39:7 40:8 41:9 42:10 43:11 44:12 45:13 46:14 47:14 48:15 49:16 50:16 51:17 52:18 53:19 54:20 55:21 56:22 57:22 58:22 59:22 60:23 61:24 62:25 63:26 64:26 65:26 66:26 67:27 68:28 69:29 70:30 71:31 72:31 73:32 74:32 75:32 76:32 77:32 78:32 79:33 80:34 81:35 82:35 83:35 84:35 85:35 86:35 87:35 88:36 89:36 90:37 91:38 92:39 93:40 94:40 95:40 96:40 97:41 98:41 99:42 100:43 101:43 102:44 103:44 104:45 105:46 106:47 107:48 108:48 109:48 110:49 111:50 112:51 113:52 114:52 115:52 116:53 117:54 118:55 119:55 120:55 121:56 122:56 123:57 124:58 125:59 126:59 127:59 128:59 129:60 130:61 131:62 132:63 133:64 134:65 135:66 136:67 137:68 138:69 139:69 140:70 141:71 142:71 143:71 144:71 145:71 146:71 147:72 148:72 149:73 150:74 151:75 152:76 153:77 154:78 155:79 156:80 157:80 158:80 159:80 160:80 161:81 162:82 163:83 164:84 165:84 166:84 167:84 168:85 169:86 170:87 171:88 172:89 173:90 174:91 175:92 176:93 177:93 178:93 179:94 180:95 181:96 182:97 183:98 184:99 185:100 186:101 187:102 188:102 189:103 190:103 191:104 192:105 193:106 194:107 195:108 196:109 197:110 198:111 199:112 200:112\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 55 18 2387 249 23 25 2387 249 2087 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 46\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 51\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁10 th ▁and ▁11 th ▁centuries\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 2\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁from ▁which ▁countries ▁did ▁the ▁nor se ▁originate ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 40 59 452 190 18 2387 1022 19788 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 91\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 99\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁ den mark , ▁ice land ▁and ▁nor way\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 3\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁did ▁king ▁ char les ▁ iii ▁swear ▁fe al ty ▁to ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 16:0 17:1 18:1 19:1 20:2 21:2 22:2 23:2 24:2 25:3 26:3 27:3 28:3 29:3 30:3 31:4 32:4 33:5 34:5 35:5 36:5 37:6 38:6 39:6 40:7 41:7 42:7 43:8 44:9 45:10 46:11 47:12 48:13 49:14 50:14 51:15 52:16 53:16 54:17 55:18 56:19 57:20 58:21 59:22 60:22 61:22 62:22 63:23 64:24 65:25 66:26 67:26 68:26 69:26 70:27 71:28 72:29 73:30 74:31 75:31 76:32 77:32 78:32 79:32 80:32 81:32 82:33 83:34 84:35 85:35 86:35 87:35 88:35 89:35 90:35 91:36 92:36 93:37 94:38 95:39 96:40 97:40 98:40 99:40 100:41 101:41 102:42 103:43 104:43 105:44 106:44 107:45 108:46 109:47 110:48 111:48 112:48 113:49 114:50 115:51 116:52 117:52 118:52 119:53 120:54 121:55 122:55 123:55 124:56 125:56 126:57 127:58 128:59 129:59 130:59 131:59 132:60 133:61 134:62 135:63 136:64 137:65 138:66 139:67 140:68 141:69 142:69 143:70 144:71 145:71 146:71 147:71 148:71 149:71 150:72 151:72 152:73 153:74 154:75 155:76 156:77 157:78 158:79 159:80 160:80 161:80 162:80 163:80 164:81 165:82 166:83 167:84 168:84 169:84 170:84 171:85 172:86 173:87 174:88 175:89 176:90 177:91 178:92 179:93 180:93 181:93 182:94 183:95 184:96 185:97 186:98 187:99 188:100 189:101 190:102 191:102 192:103 193:103 194:104 195:105 196:106 197:107 198:108 199:109 200:110 201:111 202:112 203:112\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 190 3351 17 6628 1890 17 28488 13650 9151 212 982 22 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 4\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁did ▁the ▁frank ish ▁identity ▁emerge ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:1 13:1 14:2 15:2 16:2 17:2 18:2 19:3 20:3 21:3 22:3 23:3 24:3 25:4 26:4 27:5 28:5 29:5 30:5 31:6 32:6 33:6 34:7 35:7 36:7 37:8 38:9 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:16 48:17 49:18 50:19 51:20 52:21 53:22 54:22 55:22 56:22 57:23 58:24 59:25 60:26 61:26 62:26 63:26 64:27 65:28 66:29 67:30 68:31 69:31 70:32 71:32 72:32 73:32 74:32 75:32 76:33 77:34 78:35 79:35 80:35 81:35 82:35 83:35 84:35 85:36 86:36 87:37 88:38 89:39 90:40 91:40 92:40 93:40 94:41 95:41 96:42 97:43 98:43 99:44 100:44 101:45 102:46 103:47 104:48 105:48 106:48 107:49 108:50 109:51 110:52 111:52 112:52 113:53 114:54 115:55 116:55 117:55 118:56 119:56 120:57 121:58 122:59 123:59 124:59 125:59 126:60 127:61 128:62 129:63 130:64 131:65 132:66 133:67 134:68 135:69 136:69 137:70 138:71 139:71 140:71 141:71 142:71 143:71 144:72 145:72 146:73 147:74 148:75 149:76 150:77 151:78 152:79 153:80 154:80 155:80 156:80 157:80 158:81 159:82 160:83 161:84 162:84 163:84 164:84 165:85 166:86 167:87 168:88 169:89 170:90 171:91 172:92 173:93 174:93 175:93 176:94 177:95 178:96 179:97 180:98 181:99 182:100 183:101 184:102 185:102 186:103 187:103 188:104 189:105 190:106 191:107 192:108 193:109 194:110 195:111 196:112 197:112\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 190 18 23675 1406 3643 7624 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 5\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁was ▁the ▁duke ▁in ▁the ▁battle ▁of ▁has ting s ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 30 18 25950 25 18 1727 20 51 1203 23 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 246\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 251\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁ william ▁the ▁con quer or\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 6\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁principal ity ▁did ▁ william ▁the ▁con quer er ▁found ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3824 769 190 17 31499 18 2147 10792 118 255 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 7\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁branch ▁of ▁theoretical ▁computer ▁science ▁deals ▁with ▁broadly ▁classify ing ▁computational ▁problems ▁by ▁difficulty ▁and ▁class ▁of ▁relationship ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 22:0 23:1 24:2 25:3 26:4 27:5 28:6 29:7 30:8 31:9 32:10 33:11 34:12 35:13 36:14 37:15 38:16 39:17 40:18 41:18 42:19 43:20 44:21 45:22 46:23 47:24 48:25 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:32 57:32 58:33 59:34 60:35 61:36 62:37 63:38 64:39 65:40 66:41 67:42 68:43 69:44 70:45 71:46 72:46 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:64 93:65 94:66 95:67 96:68 97:69 98:69 99:70 100:71 101:72 102:73 103:73\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3709 20 13109 920 1767 4108 33 16026 27871 56 23228 708 37 6157 21 1075 20 1498 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 22\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 24\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁computational ▁complexity ▁theory\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 8\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁is ▁a ▁manual ▁application ▁of ▁mathematical ▁steps ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:13 25:14 26:15 27:16 28:17 29:18 30:18 31:19 32:20 33:21 34:22 35:23 36:24 37:25 38:25 39:26 40:27 41:28 42:29 43:30 44:31 45:32 46:32 47:33 48:34 49:35 50:36 51:37 52:38 53:39 54:40 55:41 56:42 57:43 58:44 59:45 60:46 61:46 62:46 63:47 64:48 65:49 66:50 67:51 68:52 69:52 70:53 71:54 72:55 73:56 74:57 75:58 76:59 77:60 78:61 79:62 80:63 81:64 82:65 83:66 84:67 85:68 86:69 87:69 88:70 89:71 90:72 91:73 92:73\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 27 24 6403 1479 20 12956 2094 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 9\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁measure ▁of ▁a ▁computational ▁problem ▁broadly ▁defines ▁the ▁inherent ▁difficulty ▁of ▁the ▁solution ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 17:0 18:1 19:2 20:3 21:4 22:5 23:6 24:7 25:8 26:9 27:10 28:11 29:12 30:12 31:13 32:14 33:15 34:16 35:16 36:17 37:18 38:19 39:19 40:20 41:21 42:21 43:22 44:23 45:24 46:25 47:26 48:27 49:28 50:29 51:30 52:31 53:32 54:33 55:33 56:34 57:35 58:36 59:37 60:38 61:39 62:40 63:41 64:41 65:42 66:43 67:44 68:45 69:46 70:46 71:47 72:48 73:49 74:50 75:51 76:52 77:52 78:53 79:54 80:55 81:56 82:57 83:58 84:59 85:59 86:59 87:60 88:61 89:62 90:62 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:69 99:70 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:75 108:76 109:77 110:78 111:79 112:79 113:79 114:80 115:81 116:82 117:82 118:82 119:83 120:84 121:85 122:86 123:87 124:88 125:89 126:90 127:91 128:92 129:93 130:94 131:95 132:96 133:97 134:98 135:99 136:100 137:101 138:102 139:103 140:103\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2310 20 24 23228 662 16026 14668 18 16507 6157 20 18 1938 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 24\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 29\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁if ▁its ▁solution ▁requires ▁significant ▁resources\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 10\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁method ▁is ▁used ▁to ▁in tu itive ly ▁assess ▁or ▁quantify ▁the ▁amount ▁of ▁resources ▁required ▁to ▁solve ▁a ▁computational ▁problem ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 25:0 26:1 27:2 28:3 29:4 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:12 39:13 40:14 41:15 42:16 43:16 44:17 45:18 46:19 47:19 48:20 49:21 50:21 51:22 52:23 53:24 54:25 55:26 56:27 57:28 58:29 59:30 60:31 61:32 62:33 63:33 64:34 65:35 66:36 67:37 68:38 69:39 70:40 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:46 79:47 80:48 81:49 82:50 83:51 84:52 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:59 94:59 95:60 96:61 97:62 98:62 99:62 100:63 101:64 102:65 103:66 104:67 105:68 106:69 107:70 108:70 109:70 110:71 111:72 112:73 113:73 114:74 115:75 116:76 117:77 118:78 119:79 120:79 121:79 122:80 123:81 124:82 125:82 126:82 127:83 128:84 129:85 130:86 131:87 132:88 133:89 134:90 135:91 136:92 137:93 138:94 139:95 140:96 141:97 142:98 143:99 144:100 145:101 146:102 147:103 148:103\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2175 27 179 22 25 2853 9736 111 7329 49 30299 18 1065 20 1485 978 22 4929 24 23228 662 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 53\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 56\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁mathematical ▁models ▁of ▁computation\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000011\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 11\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁are ▁two ▁basic ▁primary ▁resources ▁used ▁to ▁ gu age ▁complexity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:6 22:7 23:8 24:9 25:10 26:11 27:12 28:12 29:13 30:14 31:15 32:16 33:16 34:17 35:18 36:19 37:19 38:20 39:21 40:21 41:22 42:23 43:24 44:25 45:26 46:27 47:28 48:29 49:30 50:31 51:32 52:33 53:33 54:34 55:35 56:36 57:37 58:38 59:39 60:40 61:41 62:41 63:42 64:43 65:44 66:45 67:46 68:46 69:47 70:48 71:49 72:50 73:51 74:52 75:52 76:53 77:54 78:55 79:56 80:57 81:58 82:59 83:59 84:59 85:60 86:61 87:62 88:62 89:62 90:63 91:64 92:65 93:66 94:67 95:68 96:69 97:70 98:70 99:70 100:71 101:72 102:73 103:73 104:74 105:75 106:76 107:77 108:78 109:79 110:79 111:79 112:80 113:81 114:82 115:82 116:82 117:83 118:84 119:85 120:86 121:87 122:88 123:89 124:90 125:91 126:92 127:93 128:94 129:95 130:96 131:97 132:98 133:99 134:100 135:101 136:102 137:103 138:103\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 41 87 1949 1827 1485 179 22 17 3017 981 11906 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - start_position: 65\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - end_position: 67\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - answer: ▁time ▁and ▁storage\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000012\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 12\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁unit ▁is ▁measured ▁to ▁determine ▁circuit ▁simplicity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:12 25:13 26:14 27:15 28:16 29:16 30:17 31:18 32:19 33:19 34:20 35:21 36:21 37:22 38:23 39:24 40:25 41:26 42:27 43:28 44:29 45:30 46:31 47:32 48:33 49:33 50:34 51:35 52:36 53:37 54:38 55:39 56:40 57:41 58:41 59:42 60:43 61:44 62:45 63:46 64:46 65:47 66:48 67:49 68:50 69:51 70:52 71:52 72:53 73:54 74:55 75:56 76:57 77:58 78:59 79:59 80:59 81:60 82:61 83:62 84:62 85:62 86:63 87:64 88:65 89:66 90:67 91:68 92:69 93:70 94:70 95:70 96:71 97:72 98:73 99:73 100:74 101:75 102:76 103:77 104:78 105:79 106:79 107:79 108:80 109:81 110:82 111:82 112:82 113:83 114:84 115:85 116:86 117:87 118:88 119:89 120:90 121:91 122:92 123:93 124:94 125:95 126:96 127:97 128:98 129:99 130:100 131:101 132:102 133:103 134:103\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 1591 27 7375 22 2081 5034 18950 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - unique_id: 1000000013\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - example_index: 13\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁number ▁is ▁used ▁in ▁perpendicular ▁computing ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 18:8 19:9 20:10 21:11 22:12 23:12 24:13 25:14 26:15 27:16 28:16 29:17 30:18 31:19 32:19 33:20 34:21 35:21 36:22 37:23 38:24 39:25 40:26 41:27 42:28 43:29 44:30 45:31 46:32 47:33 48:33 49:34 50:35 51:36 52:37 53:38 54:39 55:40 56:41 57:41 58:42 59:43 60:44 61:45 62:46 63:46 64:47 65:48 66:49 67:50 68:51 69:52 70:52 71:53 72:54 73:55 74:56 75:57 76:58 77:59 78:59 79:59 80:60 81:61 82:62 83:62 84:62 85:63 86:64 87:65 88:66 89:67 90:68 91:69 92:70 93:70 94:70 95:71 96:72 97:73 98:73 99:74 100:75 101:76 102:77 103:78 104:79 105:79 106:79 107:80 108:81 109:82 110:82 111:82 112:83 113:84 114:85 115:86 116:87 117:88 118:89 119:90 120:91 121:92 122:93 123:94 124:95 125:96 126:97 127:98 128:99 129:100 130:101 131:102 132:103 133:103\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 243 27 179 25 30525 9848 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.utils_squad - impossible example\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_train_xlnet-base-cased_384\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - ***** Running training *****\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Num examples = 14\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Num Epochs = 3\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Instantaneous batch size per GPU = 8\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Total train batch size (w. parallel, distributed & accumulation) = 8\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Gradient Accumulation steps = 1\n", - "07/18/2019 08:53:58 - INFO - cdqa.reader.reader_sklearn - Total optimization steps = 6\n", + " overwrite_output_dir=True, per_gpu_eval_batch_size=8, ...)\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at dev-v2.0-small.json\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁in ▁what ▁country ▁is ▁nor man dy ▁located ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 25 113 234 27 2387 249 2087 798 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - start_position: 61\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - end_position: 63\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - answer: ▁ franc e\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 1\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁were ▁the ▁nor man s ▁in ▁nor man dy ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 13:0 14:1 15:1 16:1 17:2 18:2 19:2 20:2 21:2 22:3 23:3 24:3 25:3 26:3 27:3 28:4 29:4 30:5 31:5 32:5 33:5 34:6 35:6 36:6 37:7 38:7 39:7 40:8 41:9 42:10 43:11 44:12 45:13 46:14 47:14 48:15 49:16 50:16 51:17 52:18 53:19 54:20 55:21 56:22 57:22 58:22 59:22 60:23 61:24 62:25 63:26 64:26 65:26 66:26 67:27 68:28 69:29 70:30 71:31 72:31 73:32 74:32 75:32 76:32 77:32 78:32 79:33 80:34 81:35 82:35 83:35 84:35 85:35 86:35 87:35 88:36 89:36 90:37 91:38 92:39 93:40 94:40 95:40 96:40 97:41 98:41 99:42 100:43 101:43 102:44 103:44 104:45 105:46 106:47 107:48 108:48 109:48 110:49 111:50 112:51 113:52 114:52 115:52 116:53 117:54 118:55 119:55 120:55 121:56 122:56 123:57 124:58 125:59 126:59 127:59 128:59 129:60 130:61 131:62 132:63 133:64 134:65 135:66 136:67 137:68 138:69 139:69 140:70 141:71 142:71 143:71 144:71 145:71 146:71 147:72 148:72 149:73 150:74 151:75 152:76 153:77 154:78 155:79 156:80 157:80 158:80 159:80 160:80 161:81 162:82 163:83 164:84 165:84 166:84 167:84 168:85 169:86 170:87 171:88 172:89 173:90 174:91 175:92 176:93 177:93 178:93 179:94 180:95 181:96 182:97 183:98 184:99 185:100 186:101 187:102 188:102 189:103 190:103 191:104 192:105 193:106 194:107 195:108 196:109 197:110 198:111 199:112 200:112\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 55 18 2387 249 23 25 2387 249 2087 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - start_position: 46\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - end_position: 51\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - answer: ▁10 th ▁and ▁11 th ▁centuries\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 2\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁from ▁which ▁countries ▁did ▁the ▁nor se ▁originate ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:1 14:1 15:2 16:2 17:2 18:2 19:2 20:3 21:3 22:3 23:3 24:3 25:3 26:4 27:4 28:5 29:5 30:5 31:5 32:6 33:6 34:6 35:7 36:7 37:7 38:8 39:9 40:10 41:11 42:12 43:13 44:14 45:14 46:15 47:16 48:16 49:17 50:18 51:19 52:20 53:21 54:22 55:22 56:22 57:22 58:23 59:24 60:25 61:26 62:26 63:26 64:26 65:27 66:28 67:29 68:30 69:31 70:31 71:32 72:32 73:32 74:32 75:32 76:32 77:33 78:34 79:35 80:35 81:35 82:35 83:35 84:35 85:35 86:36 87:36 88:37 89:38 90:39 91:40 92:40 93:40 94:40 95:41 96:41 97:42 98:43 99:43 100:44 101:44 102:45 103:46 104:47 105:48 106:48 107:48 108:49 109:50 110:51 111:52 112:52 113:52 114:53 115:54 116:55 117:55 118:55 119:56 120:56 121:57 122:58 123:59 124:59 125:59 126:59 127:60 128:61 129:62 130:63 131:64 132:65 133:66 134:67 135:68 136:69 137:69 138:70 139:71 140:71 141:71 142:71 143:71 144:71 145:72 146:72 147:73 148:74 149:75 150:76 151:77 152:78 153:79 154:80 155:80 156:80 157:80 158:80 159:81 160:82 161:83 162:84 163:84 164:84 165:84 166:85 167:86 168:87 169:88 170:89 171:90 172:91 173:92 174:93 175:93 176:93 177:94 178:95 179:96 180:97 181:98 182:99 183:100 184:101 185:102 186:102 187:103 188:103 189:104 190:105 191:106 192:107 193:108 194:109 195:110 196:111 197:112 198:112\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 40 59 452 190 18 2387 1022 19788 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - start_position: 91\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - end_position: 99\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - answer: ▁ den mark , ▁ice land ▁and ▁nor way\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 3\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁did ▁king ▁ char les ▁ iii ▁swear ▁fe al ty ▁to ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 16:0 17:1 18:1 19:1 20:2 21:2 22:2 23:2 24:2 25:3 26:3 27:3 28:3 29:3 30:3 31:4 32:4 33:5 34:5 35:5 36:5 37:6 38:6 39:6 40:7 41:7 42:7 43:8 44:9 45:10 46:11 47:12 48:13 49:14 50:14 51:15 52:16 53:16 54:17 55:18 56:19 57:20 58:21 59:22 60:22 61:22 62:22 63:23 64:24 65:25 66:26 67:26 68:26 69:26 70:27 71:28 72:29 73:30 74:31 75:31 76:32 77:32 78:32 79:32 80:32 81:32 82:33 83:34 84:35 85:35 86:35 87:35 88:35 89:35 90:35 91:36 92:36 93:37 94:38 95:39 96:40 97:40 98:40 99:40 100:41 101:41 102:42 103:43 104:43 105:44 106:44 107:45 108:46 109:47 110:48 111:48 112:48 113:49 114:50 115:51 116:52 117:52 118:52 119:53 120:54 121:55 122:55 123:55 124:56 125:56 126:57 127:58 128:59 129:59 130:59 131:59 132:60 133:61 134:62 135:63 136:64 137:65 138:66 139:67 140:68 141:69 142:69 143:70 144:71 145:71 146:71 147:71 148:71 149:71 150:72 151:72 152:73 153:74 154:75 155:76 156:77 157:78 158:79 159:80 160:80 161:80 162:80 163:80 164:81 165:82 166:83 167:84 168:84 169:84 170:84 171:85 172:86 173:87 174:88 175:89 176:90 177:91 178:92 179:93 180:93 181:93 182:94 183:95 184:96 185:97 186:98 187:99 188:100 189:101 190:102 191:102 192:103 193:103 194:104 195:105 196:106 197:107 198:108 199:109 200:110 201:111 202:112 203:112\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 190 3351 17 6628 1890 17 28488 13650 9151 212 982 22 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 4\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁when ▁did ▁the ▁frank ish ▁identity ▁emerge ? [SEP] ▁the ▁nor man s ▁ ( nor man : ▁no ur man d s ; ▁french : ▁norm and s ; ▁ latin : ▁norm anni ) ▁were ▁the ▁people ▁who ▁in ▁the ▁10 th ▁and ▁11 th ▁centuries ▁gave ▁their ▁name ▁to ▁nor man dy , ▁a ▁region ▁in ▁ franc e . ▁they ▁were ▁descended ▁from ▁nor se ▁ ( \" nor man \" ▁comes ▁from ▁ \" nor s eman \" ) ▁raid ers ▁and ▁pirates ▁from ▁ den mark , ▁ice land ▁and ▁nor way ▁who , ▁under ▁their ▁leader ▁roll o , ▁agreed ▁to ▁swear ▁fe al ty ▁to ▁king ▁ char les ▁ iii ▁of ▁west ▁ franc ia . ▁through ▁generations ▁of ▁assimilation ▁and ▁mixing ▁with ▁the ▁native ▁frank ish ▁and ▁ ro man - gau lish ▁populations , ▁their ▁descendants ▁would ▁gradually ▁merge ▁with ▁the ▁car olin gian - based ▁cultures ▁of ▁west ▁ franc ia . ▁the ▁distinct ▁cultural ▁and ▁ethnic ▁identity ▁of ▁the ▁nor man s ▁emerged ▁initially ▁in ▁the ▁first ▁half ▁of ▁the ▁10 th ▁century , ▁and ▁it ▁continued ▁to ▁evolve ▁over ▁the ▁succeeding ▁centuries . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:1 13:1 14:2 15:2 16:2 17:2 18:2 19:3 20:3 21:3 22:3 23:3 24:3 25:4 26:4 27:5 28:5 29:5 30:5 31:6 32:6 33:6 34:7 35:7 36:7 37:8 38:9 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:16 48:17 49:18 50:19 51:20 52:21 53:22 54:22 55:22 56:22 57:23 58:24 59:25 60:26 61:26 62:26 63:26 64:27 65:28 66:29 67:30 68:31 69:31 70:32 71:32 72:32 73:32 74:32 75:32 76:33 77:34 78:35 79:35 80:35 81:35 82:35 83:35 84:35 85:36 86:36 87:37 88:38 89:39 90:40 91:40 92:40 93:40 94:41 95:41 96:42 97:43 98:43 99:44 100:44 101:45 102:46 103:47 104:48 105:48 106:48 107:49 108:50 109:51 110:52 111:52 112:52 113:53 114:54 115:55 116:55 117:55 118:56 119:56 120:57 121:58 122:59 123:59 124:59 125:59 126:60 127:61 128:62 129:63 130:64 131:65 132:66 133:67 134:68 135:69 136:69 137:70 138:71 139:71 140:71 141:71 142:71 143:71 144:72 145:72 146:73 147:74 148:75 149:76 150:77 151:78 152:79 153:80 154:80 155:80 156:80 157:80 158:81 159:82 160:83 161:84 162:84 163:84 164:84 165:85 166:86 167:87 168:88 169:89 170:90 171:91 172:92 173:93 174:93 175:93 176:94 177:95 178:96 179:97 180:98 181:99 182:100 183:101 184:102 185:102 186:103 187:103 188:104 189:105 190:106 191:107 192:108 193:109 194:110 195:111 196:112 197:112\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 90 190 18 23675 1406 3643 7624 82 0 18 2387 249 23 17 10 8670 249 60 116 1067 249 66 23 97 29183 60 17355 443 23 97 17 19737 60 17355 19102 11 55 18 104 61 25 18 241 138 21 506 138 5007 675 58 304 22 2387 249 2087 19 24 653 25 17 12786 93 9 63 55 15016 40 2387 1022 17 10 12 8670 249 12 909 40 17 12 8670 23 11153 12 11 5984 270 21 15512 40 17 1426 5022 19 2528 729 21 2387 1550 61 19 168 58 691 4419 155 19 1178 22 13650 9151 212 982 22 3351 17 6628 1890 17 28488 20 1750 17 12786 780 9 135 7821 20 31712 21 13230 33 18 2630 23675 1406 21 17 986 249 13 16975 17459 8743 19 58 14564 74 6430 14377 33 18 398 16404 18976 13 716 10086 20 1750 17 12786 780 9 18 6627 2518 21 2663 3643 20 18 2387 249 23 4871 3097 25 18 89 455 20 18 241 138 997 19 21 36 952 22 16331 95 18 20384 5007 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 5\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁was ▁the ▁duke ▁in ▁the ▁battle ▁of ▁has ting s ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 30 18 25950 25 18 1727 20 51 1203 23 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - start_position: 246\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - end_position: 251\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - answer: ▁ william ▁the ▁con quer or\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 6\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁principal ity ▁did ▁ william ▁the ▁con quer er ▁found ? [SEP] ▁the ▁nor man ▁dynasty ▁had ▁a ▁major ▁political , ▁cultural ▁and ▁military ▁impact ▁on ▁medieval ▁euro pe ▁and ▁even ▁the ▁near ▁east . ▁the ▁nor man s ▁were ▁famed ▁for ▁their ▁martial ▁spirit ▁and ▁eventually ▁for ▁their ▁christian ▁pie ty , ▁becoming ▁ex ponent s ▁of ▁the ▁cat hol ic ▁or tho d oxy ▁into ▁which ▁they ▁assimilate d . ▁they ▁adopted ▁the ▁ gall o - rom ance ▁language ▁of ▁the ▁frank ish ▁land ▁they ▁settled , ▁their ▁dialect ▁becoming ▁known ▁as ▁nor man , ▁nor ma und ▁or ▁nor man ▁french , ▁an ▁important ▁literary ▁language . ▁the ▁du chy ▁of ▁nor man dy , ▁which ▁they ▁formed ▁by ▁treaty ▁with ▁the ▁french ▁crown , ▁was ▁a ▁great ▁ fi ef ▁of ▁medieval ▁ franc e , ▁and ▁under ▁rich ard ▁ i ▁of ▁nor man dy ▁was ▁forged ▁into ▁a ▁cohesive ▁and ▁formidable ▁principal ity ▁in ▁feudal ▁tenure . ▁the ▁nor man s ▁are ▁noted ▁both ▁for ▁their ▁culture , ▁such ▁as ▁their ▁unique ▁ ro man esque ▁architecture ▁and ▁musical ▁traditions , ▁and ▁for ▁their ▁significant ▁military ▁accomplishments ▁and ▁innovations . ▁nor man ▁adventure rs ▁founded ▁the ▁kingdom ▁of ▁ s ici ly ▁under ▁ ro ger ▁ ii ▁after ▁con quer ing ▁southern ▁it aly ▁on ▁the ▁ s ara cen s ▁and ▁by zan tine s , ▁and ▁an ▁expedition ▁on ▁behalf ▁of ▁their ▁duke , ▁ william ▁the ▁con quer or , ▁led ▁to ▁the ▁nor man ▁conquest ▁of ▁ eng land ▁at ▁the ▁battle ▁of ▁has ting s ▁in ▁10 66 . ▁nor man ▁cultural ▁and ▁military ▁influence ▁spread ▁from ▁these ▁new ▁ european ▁centres ▁to ▁the ▁crusade r ▁states ▁of ▁the ▁near ▁east , ▁where ▁their ▁prince ▁ bo he mond ▁ i ▁founded ▁the ▁principal ity ▁of ▁anti och ▁in ▁the ▁ le vant , ▁to ▁ s cot land ▁and ▁ wal es ▁in ▁great ▁ bri tain , ▁to ▁ ire land , ▁and ▁to ▁the ▁coast s ▁of ▁north ▁a fri ca ▁and ▁the ▁can ary ▁islands . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 14:0 15:1 16:1 17:2 18:3 19:4 20:5 21:6 22:6 23:7 24:8 25:9 26:10 27:11 28:12 29:13 30:13 31:14 32:15 33:16 34:17 35:18 36:18 37:19 38:20 39:20 40:20 41:21 42:22 43:23 44:24 45:25 46:26 47:27 48:28 49:29 50:30 51:31 52:32 53:32 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:37 63:37 64:38 65:38 66:38 67:38 68:39 69:40 70:41 71:42 72:42 73:42 74:43 75:44 76:45 77:46 78:46 79:46 80:46 81:46 82:46 83:47 84:48 85:49 86:50 87:50 88:51 89:52 90:53 91:53 92:54 93:55 94:56 95:57 96:58 97:59 98:59 99:59 100:60 101:60 102:60 103:61 104:62 105:62 106:63 107:63 108:64 109:65 110:66 111:67 112:67 113:68 114:69 115:69 116:70 117:71 118:71 119:71 120:71 121:72 122:73 123:74 124:75 125:76 126:77 127:78 128:79 129:80 130:80 131:81 132:82 133:83 134:84 135:84 136:84 137:85 138:86 139:87 140:87 141:87 142:87 143:88 144:89 145:90 146:90 147:91 148:91 149:92 150:93 151:93 152:93 153:94 154:95 155:96 156:97 157:98 158:99 159:100 160:101 161:101 162:102 163:103 164:104 165:104 166:105 167:106 168:106 169:106 170:107 171:108 172:109 173:110 174:111 175:112 176:112 177:113 178:114 179:115 180:116 181:117 182:117 183:117 184:117 185:118 186:119 187:120 188:121 189:121 190:122 191:123 192:124 193:125 194:126 195:127 196:128 197:129 198:129 199:130 200:130 201:131 202:131 203:132 204:133 205:134 206:135 207:136 208:136 209:136 210:136 211:137 212:138 213:138 214:138 215:139 216:139 217:140 218:141 219:141 220:141 221:142 222:143 223:143 224:144 225:145 226:146 227:146 228:146 229:146 230:146 231:147 232:148 233:148 234:148 235:148 236:148 237:149 238:150 239:151 240:152 241:153 242:154 243:155 244:156 245:156 246:157 247:157 248:158 249:159 250:159 251:159 252:159 253:160 254:161 255:162 256:163 257:163 258:164 259:165 260:166 261:166 262:166 263:167 264:168 265:169 266:170 267:171 268:171 269:171 270:172 271:173 272:173 273:173 274:174 275:174 276:175 277:176 278:177 279:178 280:179 281:180 282:181 283:182 284:183 285:183 286:184 287:185 288:186 289:187 290:187 291:188 292:189 293:190 294:191 295:192 296:192 297:193 298:194 299:195 300:196 301:196 302:196 303:196 304:197 305:197 306:198 307:199 308:200 309:200 310:201 311:202 312:202 313:203 314:204 315:205 316:205 317:205 318:205 319:206 320:207 321:207 322:207 323:207 324:208 325:209 326:209 327:209 328:210 329:211 330:212 331:212 332:212 333:212 334:213 335:214 336:214 337:214 338:214 339:215 340:216 341:217 342:218 343:218 344:219 345:220 346:221 347:221 348:221 349:222 350:223 351:224 352:224 353:225 354:225\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True 162:True 163:True 164:True 165:True 166:True 167:True 168:True 169:True 170:True 171:True 172:True 173:True 174:True 175:True 176:True 177:True 178:True 179:True 180:True 181:True 182:True 183:True 184:True 185:True 186:True 187:True 188:True 189:True 190:True 191:True 192:True 193:True 194:True 195:True 196:True 197:True 198:True 199:True 200:True 201:True 202:True 203:True 204:True 205:True 206:True 207:True 208:True 209:True 210:True 211:True 212:True 213:True 214:True 215:True 216:True 217:True 218:True 219:True 220:True 221:True 222:True 223:True 224:True 225:True 226:True 227:True 228:True 229:True 230:True 231:True 232:True 233:True 234:True 235:True 236:True 237:True 238:True 239:True 240:True 241:True 242:True 243:True 244:True 245:True 246:True 247:True 248:True 249:True 250:True 251:True 252:True 253:True 254:True 255:True 256:True 257:True 258:True 259:True 260:True 261:True 262:True 263:True 264:True 265:True 266:True 267:True 268:True 269:True 270:True 271:True 272:True 273:True 274:True 275:True 276:True 277:True 278:True 279:True 280:True 281:True 282:True 283:True 284:True 285:True 286:True 287:True 288:True 289:True 290:True 291:True 292:True 293:True 294:True 295:True 296:True 297:True 298:True 299:True 300:True 301:True 302:True 303:True 304:True 305:True 306:True 307:True 308:True 309:True 310:True 311:True 312:True 313:True 314:True 315:True 316:True 317:True 318:True 319:True 320:True 321:True 322:True 323:True 324:True 325:True 326:True 327:True 328:True 329:True 330:True 331:True 332:True 333:True 334:True 335:True 336:True 337:True 338:True 339:True 340:True 341:True 342:True 343:True 344:True 345:True 346:True 347:True 348:True 349:True 350:True 351:True 352:True 353:True 354:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3824 769 190 17 31499 18 2147 10792 118 255 82 0 18 2387 249 12765 54 24 383 413 19 2518 21 370 1585 31 10146 2926 1590 21 176 18 479 1646 9 18 2387 249 23 55 17447 28 58 12656 3424 21 1707 28 58 31747 11703 982 19 1939 2002 16821 23 20 18 4777 7439 556 49 6684 66 11285 91 59 63 30218 66 9 63 3135 18 17 12353 155 13 14182 1789 1243 20 18 23675 1406 883 63 3602 19 58 13424 1939 318 34 2387 249 19 2387 661 5587 49 2387 249 29183 19 48 400 6957 1243 9 18 4626 8358 20 2387 249 2087 19 59 63 1851 37 4816 33 18 29183 6923 19 30 24 312 17 2265 4631 20 10146 17 12786 93 19 21 168 2628 1896 17 150 20 2387 249 2087 30 17163 91 24 30133 21 17868 3824 769 25 28893 8709 9 18 2387 249 23 41 1699 207 28 58 1799 19 148 34 58 1779 17 986 249 17660 4797 21 2985 8991 19 21 28 58 1376 370 17877 21 19273 9 2387 249 6693 1114 2118 18 5975 20 17 23 9620 111 168 17 986 2371 17 8343 99 2147 10792 56 1335 36 12229 31 18 17 23 3068 9593 23 21 37 10280 10115 23 19 21 48 8553 31 4399 20 58 25950 19 17 31499 18 2147 10792 218 19 687 22 18 2387 249 18093 20 17 5618 729 38 18 1727 20 51 1203 23 25 241 4126 9 2387 249 2518 21 370 2204 1912 40 166 109 17 30707 13348 22 18 21018 213 1035 20 18 479 1646 19 131 58 8434 17 1238 1438 7778 17 150 2118 18 3824 769 20 932 6892 25 18 17 529 11226 19 22 17 23 12982 729 21 17 9760 202 25 312 17 5365 3766 19 22 17 5294 729 19 21 22 18 2372 23 20 1012 24 5994 1346 21 18 64 1449 5852 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - example_index: 7\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁branch ▁of ▁theoretical ▁computer ▁science ▁deals ▁with ▁broadly ▁classify ing ▁computational ▁problems ▁by ▁difficulty ▁and ▁class ▁of ▁relationship ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 22:0 23:1 24:2 25:3 26:4 27:5 28:6 29:7 30:8 31:9 32:10 33:11 34:12 35:13 36:14 37:15 38:16 39:17 40:18 41:18 42:19 43:20 44:21 45:22 46:23 47:24 48:25 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:32 57:32 58:33 59:34 60:35 61:36 62:37 63:38 64:39 65:40 66:41 67:42 68:43 69:44 70:45 71:46 72:46 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:64 93:65 94:66 95:67 96:68 97:69 98:69 99:70 100:71 101:72 102:73 103:73\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - token_is_max_context: 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 3709 20 13109 920 1767 4108 33 16026 27871 56 23228 708 37 6157 21 1075 20 1498 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - start_position: 22\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - end_position: 24\n", + "07/18/2019 10:12:45 - INFO - cdqa.reader.utils_squad - answer: ▁computational ▁complexity ▁theory\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - example_index: 8\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁is ▁a ▁manual ▁application ▁of ▁mathematical ▁steps ? [SEP] ▁computational ▁complexity ▁theory ▁is ▁a ▁branch ▁of ▁the ▁theory ▁of ▁computation ▁in ▁theoretical ▁computer ▁science ▁that ▁focuses ▁on ▁classify ing ▁computational ▁problems ▁according ▁to ▁their ▁inherent ▁difficulty , ▁and ▁relating ▁those ▁classes ▁to ▁each ▁other . ▁a ▁computational ▁problem ▁is ▁understood ▁to ▁be ▁a ▁task ▁that ▁is ▁in ▁principle ▁a men able ▁to ▁being ▁solved ▁by ▁a ▁computer , ▁which ▁is ▁equivalent ▁to ▁stating ▁that ▁the ▁problem ▁may ▁be ▁solved ▁by ▁mechanical ▁application ▁of ▁mathematical ▁steps , ▁such ▁as ▁an ▁algorithm . [SEP]\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:13 25:14 26:15 27:16 28:17 29:18 30:18 31:19 32:20 33:21 34:22 35:23 36:24 37:25 38:25 39:26 40:27 41:28 42:29 43:30 44:31 45:32 46:32 47:33 48:34 49:35 50:36 51:37 52:38 53:39 54:40 55:41 56:42 57:43 58:44 59:45 60:46 61:46 62:46 63:47 64:48 65:49 66:50 67:51 68:52 69:52 70:53 71:54 72:55 73:56 74:57 75:58 76:59 77:60 78:61 79:62 80:63 81:64 82:65 83:66 84:67 85:68 86:69 87:69 88:70 89:71 90:72 91:73 92:73\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 27 24 6403 1479 20 12956 2094 82 0 23228 11906 2818 27 24 3709 20 18 2818 20 27686 25 13109 920 1767 29 7712 31 27871 56 23228 708 549 22 58 16507 6157 19 21 7376 186 2814 22 231 86 9 24 23228 662 27 4950 22 39 24 2578 29 27 25 4926 24 1126 386 22 163 12567 37 24 920 19 59 27 4682 22 8033 29 18 662 132 39 12567 37 7820 1479 20 12956 2094 19 148 34 48 13301 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - example_index: 9\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁measure ▁of ▁a ▁computational ▁problem ▁broadly ▁defines ▁the ▁inherent ▁difficulty ▁of ▁the ▁solution ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 17:0 18:1 19:2 20:3 21:4 22:5 23:6 24:7 25:8 26:9 27:10 28:11 29:12 30:12 31:13 32:14 33:15 34:16 35:16 36:17 37:18 38:19 39:19 40:20 41:21 42:21 43:22 44:23 45:24 46:25 47:26 48:27 49:28 50:29 51:30 52:31 53:32 54:33 55:33 56:34 57:35 58:36 59:37 60:38 61:39 62:40 63:41 64:41 65:42 66:43 67:44 68:45 69:46 70:46 71:47 72:48 73:49 74:50 75:51 76:52 77:52 78:53 79:54 80:55 81:56 82:57 83:58 84:59 85:59 86:59 87:60 88:61 89:62 90:62 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:69 99:70 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:75 108:76 109:77 110:78 111:79 112:79 113:79 114:80 115:81 116:82 117:82 118:82 119:83 120:84 121:85 122:86 123:87 124:88 125:89 126:90 127:91 128:92 129:93 130:94 131:95 132:96 133:97 134:98 135:99 136:100 137:101 138:102 139:103 140:103\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_is_max_context: 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2310 20 24 23228 662 16026 14668 18 16507 6157 20 18 1938 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - start_position: 24\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - end_position: 29\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - answer: ▁if ▁its ▁solution ▁requires ▁significant ▁resources\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - example_index: 10\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁method ▁is ▁used ▁to ▁in tu itive ly ▁assess ▁or ▁quantify ▁the ▁amount ▁of ▁resources ▁required ▁to ▁solve ▁a ▁computational ▁problem ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 25:0 26:1 27:2 28:3 29:4 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:12 39:13 40:14 41:15 42:16 43:16 44:17 45:18 46:19 47:19 48:20 49:21 50:21 51:22 52:23 53:24 54:25 55:26 56:27 57:28 58:29 59:30 60:31 61:32 62:33 63:33 64:34 65:35 66:36 67:37 68:38 69:39 70:40 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:46 79:47 80:48 81:49 82:50 83:51 84:52 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:59 94:59 95:60 96:61 97:62 98:62 99:62 100:63 101:64 102:65 103:66 104:67 105:68 106:69 107:70 108:70 109:70 110:71 111:72 112:73 113:73 114:74 115:75 116:76 117:77 118:78 119:79 120:79 121:79 122:80 123:81 124:82 125:82 126:82 127:83 128:84 129:85 130:86 131:87 132:88 133:89 134:90 135:91 136:92 137:93 138:94 139:95 140:96 141:97 142:98 143:99 144:100 145:101 146:102 147:103 148:103\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_is_max_context: 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 2175 27 179 22 25 2853 9736 111 7329 49 30299 18 1065 20 1485 978 22 4929 24 23228 662 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - start_position: 53\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - end_position: 56\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - answer: ▁mathematical ▁models ▁of ▁computation\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - unique_id: 1000000011\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - example_index: 11\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁are ▁two ▁basic ▁primary ▁resources ▁used ▁to ▁ gu age ▁complexity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:6 22:7 23:8 24:9 25:10 26:11 27:12 28:12 29:13 30:14 31:15 32:16 33:16 34:17 35:18 36:19 37:19 38:20 39:21 40:21 41:22 42:23 43:24 44:25 45:26 46:27 47:28 48:29 49:30 50:31 51:32 52:33 53:33 54:34 55:35 56:36 57:37 58:38 59:39 60:40 61:41 62:41 63:42 64:43 65:44 66:45 67:46 68:46 69:47 70:48 71:49 72:50 73:51 74:52 75:52 76:53 77:54 78:55 79:56 80:57 81:58 82:59 83:59 84:59 85:60 86:61 87:62 88:62 89:62 90:63 91:64 92:65 93:66 94:67 95:68 96:69 97:70 98:70 99:70 100:71 101:72 102:73 103:73 104:74 105:75 106:76 107:77 108:78 109:79 110:79 111:79 112:80 113:81 114:82 115:82 116:82 117:83 118:84 119:85 120:86 121:87 122:88 123:89 124:90 125:91 126:92 127:93 128:94 129:95 130:96 131:97 132:98 133:99 134:100 135:101 136:102 137:103 138:103\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 41 87 1949 1827 1485 179 22 17 3017 981 11906 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - start_position: 65\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - end_position: 67\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - answer: ▁time ▁and ▁storage\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - unique_id: 1000000012\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - example_index: 12\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁unit ▁is ▁measured ▁to ▁determine ▁circuit ▁simplicity ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 11:0 12:1 13:2 14:3 15:4 16:5 17:6 18:7 19:8 20:9 21:10 22:11 23:12 24:12 25:13 26:14 27:15 28:16 29:16 30:17 31:18 32:19 33:19 34:20 35:21 36:21 37:22 38:23 39:24 40:25 41:26 42:27 43:28 44:29 45:30 46:31 47:32 48:33 49:33 50:34 51:35 52:36 53:37 54:38 55:39 56:40 57:41 58:41 59:42 60:43 61:44 62:45 63:46 64:46 65:47 66:48 67:49 68:50 69:51 70:52 71:52 72:53 73:54 74:55 75:56 76:57 77:58 78:59 79:59 80:59 81:60 82:61 83:62 84:62 85:62 86:63 87:64 88:65 89:66 90:67 91:68 92:69 93:70 94:70 95:70 96:71 97:72 98:73 99:73 100:74 101:75 102:76 103:77 104:78 105:79 106:79 107:79 108:80 109:81 110:82 111:82 112:82 113:83 114:84 115:85 116:86 117:87 118:88 119:89 120:90 121:91 122:92 123:93 124:94 125:95 126:96 127:97 128:98 129:99 130:100 131:101 132:102 133:103 134:103\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_is_max_context: 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 1591 27 7375 22 2081 5034 18950 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - unique_id: 1000000013\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - example_index: 13\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁what ▁number ▁is ▁used ▁in ▁perpendicular ▁computing ? [SEP] ▁a ▁problem ▁is ▁regarded ▁as ▁inherently ▁difficult ▁if ▁its ▁solution ▁requires ▁significant ▁resources , ▁whatever ▁the ▁algorithm ▁used . ▁the ▁theory ▁formal izes ▁this ▁intuition , ▁by ▁introducing ▁mathematical ▁models ▁of ▁computation ▁to ▁study ▁these ▁problems ▁and ▁quantify ing ▁the ▁amount ▁of ▁resources ▁needed ▁to ▁solve ▁them , ▁such ▁as ▁time ▁and ▁storage . ▁other ▁complexity ▁measures ▁are ▁also ▁used , ▁such ▁as ▁the ▁amount ▁of ▁communication ▁ ( used ▁in ▁communication ▁complexity ) , ▁the ▁number ▁of ▁gates ▁in ▁a ▁circuit ▁ ( used ▁in ▁circuit ▁complexity ) ▁and ▁the ▁number ▁of ▁processors ▁ ( used ▁in ▁parallel ▁computing ) . ▁one ▁of ▁the ▁roles ▁of ▁computational ▁complexity ▁theory ▁is ▁to ▁determine ▁the ▁practical ▁limits ▁on ▁what ▁computers ▁can ▁and ▁cannot ▁do . [SEP]\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 18:8 19:9 20:10 21:11 22:12 23:12 24:13 25:14 26:15 27:16 28:16 29:17 30:18 31:19 32:19 33:20 34:21 35:21 36:22 37:23 38:24 39:25 40:26 41:27 42:28 43:29 44:30 45:31 46:32 47:33 48:33 49:34 50:35 51:36 52:37 53:38 54:39 55:40 56:41 57:41 58:42 59:43 60:44 61:45 62:46 63:46 64:47 65:48 66:49 67:50 68:51 69:52 70:52 71:53 72:54 73:55 74:56 75:57 76:58 77:59 78:59 79:59 80:60 81:61 82:62 83:62 84:62 85:63 86:64 87:65 88:66 89:67 90:68 91:69 92:70 93:70 94:70 95:71 96:72 97:73 98:73 99:74 100:75 101:76 102:77 103:78 104:79 105:79 106:79 107:80 108:81 109:82 110:82 111:82 112:83 113:84 114:85 115:86 116:87 117:88 118:89 119:90 120:91 121:92 122:93 123:94 124:95 125:96 126:97 127:98 128:99 129:100 130:101 131:102 132:103 133:103\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - token_is_max_context: 10:True 11:True 12:True 13:True 14:True 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_ids: 0 113 243 27 179 25 30525 9848 82 0 24 662 27 5520 34 26163 1132 108 81 1938 2543 1376 1485 19 2636 18 13301 179 9 18 2818 3279 17132 52 27069 19 37 11707 12956 2626 20 27686 22 757 166 708 21 30299 56 18 1065 20 1485 790 22 4929 107 19 148 34 92 21 3386 9 86 11906 1858 41 77 179 19 148 34 18 1065 20 3056 17 10 10583 25 3056 11906 11 19 18 243 20 11545 25 24 5034 17 10 10583 25 5034 11906 11 21 18 243 20 18629 17 10 10583 25 5945 9848 11 9 65 20 18 4779 20 23228 11906 2818 27 22 2081 18 4224 4340 31 113 3668 64 21 977 112 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.utils_squad - impossible example\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_train_xlnet-base-cased_384\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - ***** Running training *****\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - Num examples = 14\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - Num Epochs = 3\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - Instantaneous batch size per GPU = 8\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - Gradient Accumulation steps = 1\n", + "07/18/2019 10:12:46 - INFO - cdqa.reader.reader_sklearn - Total optimization steps = 6\n", "Epoch: 0%| | 0/3 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datetitlecategorylinkabstractparagraphs
013.05.2019The banking jobs : Assistant Vice President – ...Careershttps://group.bnpparibas/en/news/banking-jobs-...Within the Group’s Corporate and Institutional...[I manage a team in charge of designing and im...
113.05.2019BNP Paribas at #VivaTech : discover the progra...Innovationhttps://group.bnpparibas/en/news/bnp-paribas-v...From Thursday 16 to Saturday 18 May 2019, join...[With François Hollande, Chairman of French fo...
213.05.2019\"The bank with an IT budget of more than EUR6 ...Grouphttps://group.bnpparibas/en/news/the-bank-budg...Interview with Jean-Laurent Bonnafé, Director ...[We did the groundwork between 2012 and 2016, ...
310.05.2019BNP Paribas at #VivaTech : discover the progra...Innovationhttps://group.bnpparibas/en/news/bnp-paribas-v...From Thursday 16 to Saturday 18 May 2019, join...[As part of the ‘United Tech of Europe’ theme,...
410.05.2019When Artificial Intelligence participates in r...Careershttps://group.bnpparibas/en/news/artificial-in...As the competition to attract talent intensifi...[Online recruitment is already the norm. Accor...
\n", - "" - ], - "text/plain": [ - " date ... paragraphs\n", - "0 13.05.2019 ... [I manage a team in charge of designing and im...\n", - "1 13.05.2019 ... [With François Hollande, Chairman of French fo...\n", - "2 13.05.2019 ... [We did the groundwork between 2012 and 2016, ...\n", - "3 10.05.2019 ... [As part of the ‘United Tech of Europe’ theme,...\n", - "4 10.05.2019 ... [Online recruitment is already the norm. Accor...\n", - "\n", - "[5 rows x 6 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 14 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "rBljRU1gaJ8l", - "colab_type": "code", - "colab": {} - }, - "source": [ - "query = 'Since when does the Excellence Program of BNP Paribas exist?'" + "df = filter_paragraphs(df)" ], "execution_count": 0, "outputs": [] @@ -1205,25 +835,38 @@ "metadata": { "id": "KQ9Be2rzZYQb", "colab_type": "code", - "outputId": "adc9b8e7-ed08-4412-8c71-20cad7db97ec", + "outputId": "bb87f011-d2a7-450d-afb6-52a3f42bc6ad", "colab": { "base_uri": "https://localhost:8080/", - "height": 172 + "height": 191 } }, "source": [ "from cdqa.utils.converters import generate_squad_examples\n", "from cdqa.retriever.tfidf_sklearn import TfidfRetriever\n", "\n", + "query = 'Since when does the Excellence Program of BNP Paribas exist?'\n", + "\n", "metadata = df\n", "metadata['content'] = metadata['paragraphs'].apply(lambda x: ' '.join(x))\n", "\n", "retriever = TfidfRetriever(verbose=True)\n", "retriever.fit(metadata['content'])\n", - "closest_docs_indices = retriever.predict(query, metadata=metadata)" + "closest_docs_indices = retriever.predict(query, metadata=metadata)\n", + "\n", + "squad_examples = generate_squad_examples(question=query,\n", + " closest_docs_indices=closest_docs_indices,\n", + " metadata=metadata)" ], - "execution_count": 16, + "execution_count": 12, "outputs": [ + { + "output_type": "stream", + "text": [ + "3it [00:00, 1480.69it/s]" + ], + "name": "stderr" + }, { "output_type": "stream", "text": [ @@ -1234,34 +877,14 @@ "| 2 | 146 | BNP Paribas Graduate Programs in France |\n", "| 3 | 881 | Making the most of your VIE! |\n", "+------+-------+-----------------------------------------------------+\n", - "Time: 0.00622 seconds\n" + "Time: 0.0099 seconds\n" ], "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "EIAlFnv_aLO_", - "colab_type": "code", - "outputId": "0af067b3-13b7-45ca-8aaa-de972f122bdf", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 36 - } - }, - "source": [ - "squad_examples = generate_squad_examples(question=query,\n", - " closest_docs_indices=closest_docs_indices,\n", - " metadata=metadata)" - ], - "execution_count": 17, - "outputs": [ + }, { "output_type": "stream", "text": [ - "3it [00:00, 924.13it/s]\n" + "\n" ], "name": "stderr" } @@ -1270,141 +893,32 @@ { "cell_type": "code", "metadata": { - "id": "WEJaWWo3cRib", + "id": "zfXr97ragvd7", "colab_type": "code", - "outputId": "78f9962c-1595-4ef8-bfa2-5e7d431cdc05", "colab": { "base_uri": "https://localhost:8080/", - "height": 947 - } - }, - "source": [ - "squad_examples" - ], - "execution_count": 18, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[{'paragraphs': [{'context': 'BNP Paribas has long maintained a strong relationship with the academic world and target schools in order to attract its future talent, whether it be universities, business schools or engineering schools. The Group serves as a committed partner of the academic world. It also plays a role in curriculum planning by updating coursework so that it matches actual business needs as closely as possible.',\n", - " 'qas': [{'answers': [],\n", - " 'id': '100e1c8e-69f7-4f4b-9f3c-936f33bcc71e',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'To promote the bank’s businesses among students and recruit high-potential candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France match the needs of its businesses, departments and subsidiaries. In other words, the role of Campus Management is to develop quality relationships with students and promote the Group’s businesses. In the words of Jean-Dominique Criscuolo, Manager of Partnerships and Relations with Schools and Universities, “We strongly believe that our Academic Relations play a growing role in transforming our Group.”',\n", - " 'qas': [{'answers': [],\n", - " 'id': 'cfac01a5-98eb-4d76-8066-adb63e24751c',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'According to Jean-Dominique Criscuolo, \"BNP Paribas comprises some 300 businesses, some of which only emerged within the last two years. Including data scientists, agile coaches, and IT inspectors, the new professions created by the digital transformation broaden the palette of the Group’s traditional businesses. In a changing world, governed by rapidly evolving international financial regulations, many new opportunities are available within our Compliance teams. This is a dynamic profession which, due to regulatory, geopolitical and societal changes, plays an increasingly central role in the crossroads of strategy and the daily actions of the bank and its customers.”',\n", - " 'qas': [{'answers': [],\n", - " 'id': 'dbba50ec-f907-46ed-9f8a-e106b17585f5',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'This large-scale project will further expand in coming years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB, observes that “this partnership aims to become one of the leading research bodies in this immensely disruptive technology. It will also play a role in transforming the financial sector through publications and major events like VivaTech.”',\n", - " 'qas': [{'answers': [],\n", - " 'id': '4cbf8827-c3a9-42c2-9bd2-fd3ca38fc2e1',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.',\n", - " 'qas': [{'answers': [],\n", - " 'id': '9b1e4395-6554-4fba-bd80-cd9639a687f4',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]}],\n", - " 'title': 'BNP Paribas’ commitment to universities and schools'},\n", - " {'paragraphs': [{'context': 'Looking to kickstart your career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’ Graduate Programs. These challenging 18-month programs spent in different operational functions enable participants to expand their banking knowledge, skills and professional networks. Learn more about this promising initiative.',\n", - " 'qas': [{'answers': [],\n", - " 'id': '2e266853-ed16-4fce-9701-a5d5c7005b80',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'Convinced that recent and future graduates represent the future of the Group, BNP Paribas is counting on their talent to maintain its edge in the market. As it works to build the future of banking, the Group is now putting in place HR measures designed to attract future talent. The Graduate Programs are one example: these recruiting programs allow selected candidates to join several operational functions for 18 months, while benefiting from personalized HR support.',\n", - " 'qas': [{'answers': [],\n", - " 'id': '56d2240c-0d26-4534-bf99-87aecec38523',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'Hired immediately through long-term contracts, participants in each pathway complete an immersive and personalized curriculum composed of three professional development assignments, each lasting six months. The rotation is co-constructed based on the needs of the bank’s businesses and the skills or interests of each participant. As full team members, participants quickly gain experience and specific skills by working directly with their peers and senior employees in the entity. As soon as they are hired, participants also become members of a “trainee class” to promote joint development, experience-sharing and to help build skills as a group. Recruiting programs are open to candidates of all nationalities, though they take place in France (with some exceptions).',\n", - " 'qas': [{'answers': [],\n", - " 'id': '719ec8c6-fd2b-4ee5-b4b5-9c77b6f7e27c',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'The programs seek a wide range of candidates. While Master’s (Bac+5) graduates in math, finance, economics, science, business, engineering and computer science, showing strong analytical skills and mathematical abilities, can apply for the Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital explorer” mentality, rather than a specific degree. In this way, the Group aims to develop an internal mindset focused on digital innovation, notably by integrating the following candidates:',\n", - " 'qas': [{'answers': [],\n", - " 'id': '533f1412-6289-4a83-bb96-efbae2ba508c',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]},\n", - " {'context': 'For all programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based on their preferences and the opportunities available with each business, participants can co-construct their pathway with HR teams as they progress through the program.',\n", - " 'qas': [{'answers': [],\n", - " 'id': '062d4586-93bf-4ab3-b03f-f15aa89f39e8',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]}],\n", - " 'title': 'BNP Paribas Graduate Programs in France'},\n", - " {'paragraphs': [{'context': 'BNP Paribas recruits new graduates to fulfill assignments lasting up to 16 months by joining the Group through one of its international subsidiaries. What types of positions are available through the VIE program? What destinations does it offer? What types of applicants do we look for? Let’s take a closer look at the international corporate volunteer program, which provides a chance to launch your international career in a position with real responsibilities.',\n", - " 'qas': [{'answers': [],\n", - " 'id': '60844039-fbfb-48ba-bd8a-824c0fc36935',\n", - " 'question': 'Since when does the Excellence Program of BNP Paribas exist?'}]}],\n", - " 'title': 'Making the most of your VIE!'}]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 18 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "qRwGqhHjXPeb", - "colab_type": "code", - "outputId": "fa38b3a4-25b1-483f-aa43-16be44ef3c21", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 947 - } + "height": 1000 + }, + "outputId": "03a5d848-2851-4b6c-8ba9-e00df02484dd" }, "source": [ - "reader = Reader(train_file='dev-v2.0-small.json',\n", - " predict_file=squad_examples,\n", - " model_type='xlnet',\n", + "# cast Reader class with train params\n", + "reader = Reader(model_type='xlnet',\n", " model_name_or_path='xlnet-base-cased',\n", " fp16=False,\n", - " output_dir='.')" + " output_dir='.')\n", + "\n", + "# train the model\n", + "out_eval, final_prediction = reader.predict(X=squad_examples)" ], - "execution_count": 19, + "execution_count": 13, "outputs": [ { "output_type": "stream", "text": [ - "07/18/2019 09:00:24 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", - "07/18/2019 09:00:25 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", - "07/18/2019 09:00:25 - INFO - pytorch_transformers.modeling_utils - Model config {\n", - " \"attn_type\": \"bi\",\n", - " \"bi_data\": false,\n", - " \"clamp_len\": -1,\n", - " \"d_head\": 64,\n", - " \"d_inner\": 3072,\n", - " \"d_model\": 768,\n", - " \"dropout\": 0.1,\n", - " \"end_n_top\": 5,\n", - " \"ff_activation\": \"gelu\",\n", - " \"finetuning_task\": null,\n", - " \"initializer_range\": 0.02,\n", - " \"layer_norm_eps\": 1e-12,\n", - " \"mem_len\": null,\n", - " \"n_head\": 12,\n", - " \"n_layer\": 12,\n", - " \"n_token\": 32000,\n", - " \"num_labels\": 2,\n", - " \"output_attentions\": false,\n", - " \"output_hidden_states\": false,\n", - " \"reuse_len\": null,\n", - " \"same_length\": false,\n", - " \"start_n_top\": 5,\n", - " \"summary_activation\": \"tanh\",\n", - " \"summary_last_dropout\": 0.1,\n", - " \"summary_type\": \"last\",\n", - " \"summary_use_proj\": true,\n", - " \"torchscript\": false,\n", - " \"untie_r\": true\n", - "}\n", - "\n", - "07/18/2019 09:00:26 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", - "07/18/2019 09:00:27 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", - "07/18/2019 09:00:33 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", - "07/18/2019 09:00:33 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", + "07/18/2019 10:19:31 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cpu, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "07/18/2019 10:19:32 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_eval=None,\n", " do_lower_case=True, do_train=None, doc_stride=128,\n", " eval_all_checkpoints=True, evaluate_during_training=True, fp16=False,\n", " fp16_opt_level='O1', gradient_accumulation_steps=1, learning_rate=5e-05,\n", @@ -1413,278 +927,193 @@ " model_name_or_path='xlnet-base-cased', model_type='xlnet',\n", " n_best_size=20, no_cuda=True, null_score_diff_threshold=0.0,\n", " num_train_epochs=3.0, output_dir='.', overwrite_cache=True,\n", - " overwrite_output_dir=True, per_gpu_eval_batch_size=8, ...)\n" + " overwrite_output_dir=True, per_gpu_eval_batch_size=8, ...)\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at [{'title': 'BNP Paribas’ commitment to universities and schools', 'paragraphs': [{'context': 'BNP Paribas has long maintained a strong relationship with the academic world and target schools in order to attract its future talent, whether it be universities, business schools or engineering schools. The Group serves as a committed partner of the academic world. It also plays a role in curriculum planning by updating coursework so that it matches actual business needs as closely as possible.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': 'e9acc119-3bb7-4d05-8add-26b253e9a553'}]}, {'context': 'To promote the bank’s businesses among students and recruit high-potential candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France match the needs of its businesses, departments and subsidiaries. In other words, the role of Campus Management is to develop quality relationships with students and promote the Group’s businesses. In the words of Jean-Dominique Criscuolo, Manager of Partnerships and Relations with Schools and Universities, “We strongly believe that our Academic Relations play a growing role in transforming our Group.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '8a56fbff-1f86-4738-9338-87cf22936029'}]}, {'context': 'According to Jean-Dominique Criscuolo, \"BNP Paribas comprises some 300 businesses, some of which only emerged within the last two years. Including data scientists, agile coaches, and IT inspectors, the new professions created by the digital transformation broaden the palette of the Group’s traditional businesses. In a changing world, governed by rapidly evolving international financial regulations, many new opportunities are available within our Compliance teams. This is a dynamic profession which, due to regulatory, geopolitical and societal changes, plays an increasingly central role in the crossroads of strategy and the daily actions of the bank and its customers.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '867d535a-716a-4607-93c8-5de67c2a2e4d'}]}, {'context': 'This large-scale project will further expand in coming years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB, observes that “this partnership aims to become one of the leading research bodies in this immensely disruptive technology. It will also play a role in transforming the financial sector through publications and major events like VivaTech.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '1da5a3f2-a476-4d8b-b705-7c56e1449a8e'}]}, {'context': 'Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '0ccabb70-e45d-41be-bc78-738d58fcd715'}]}]}, {'title': 'BNP Paribas Graduate Programs in France', 'paragraphs': [{'context': 'Looking to kickstart your career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’ Graduate Programs. These challenging 18-month programs spent in different operational functions enable participants to expand their banking knowledge, skills and professional networks. Learn more about this promising initiative.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '9039734f-3117-40a8-bf64-2d367a1e0b29'}]}, {'context': 'Convinced that recent and future graduates represent the future of the Group, BNP Paribas is counting on their talent to maintain its edge in the market. As it works to build the future of banking, the Group is now putting in place HR measures designed to attract future talent. The Graduate Programs are one example: these recruiting programs allow selected candidates to join several operational functions for 18 months, while benefiting from personalized HR support.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '0c766701-06f6-46ae-9759-893f6323682c'}]}, {'context': 'Hired immediately through long-term contracts, participants in each pathway complete an immersive and personalized curriculum composed of three professional development assignments, each lasting six months. The rotation is co-constructed based on the needs of the bank’s businesses and the skills or interests of each participant. As full team members, participants quickly gain experience and specific skills by working directly with their peers and senior employees in the entity. As soon as they are hired, participants also become members of a “trainee class” to promote joint development, experience-sharing and to help build skills as a group. Recruiting programs are open to candidates of all nationalities, though they take place in France (with some exceptions).', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '09297116-c1fe-40fc-a6cd-60a41cb90277'}]}, {'context': 'The programs seek a wide range of candidates. While Master’s (Bac+5) graduates in math, finance, economics, science, business, engineering and computer science, showing strong analytical skills and mathematical abilities, can apply for the Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital explorer” mentality, rather than a specific degree. In this way, the Group aims to develop an internal mindset focused on digital innovation, notably by integrating the following candidates:', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '5be506cb-b25c-400b-b4af-88bbd70a0410'}]}, {'context': 'For all programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based on their preferences and the opportunities available with each business, participants can co-construct their pathway with HR teams as they progress through the program.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '8821ce45-6538-4a4e-8d95-221333894d6c'}]}]}, {'title': 'Making the most of your VIE!', 'paragraphs': [{'context': 'BNP Paribas recruits new graduates to fulfill assignments lasting up to 16 months by joining the Group through one of its international subsidiaries. What types of positions are available through the VIE program? What destinations does it offer? What types of applicants do we look for? Let’s take a closer look at the international corporate volunteer program, which provides a chance to launch your international career in a position with real responsibilities.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '06650437-ae3c-4f67-a9cb-607e814a7daf'}]}]}]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁ b n p ▁ pari bas ▁has ▁long ▁maintained ▁a ▁strong ▁relationship ▁with ▁the ▁academic ▁world ▁and ▁target ▁schools ▁in ▁order ▁to ▁attract ▁its ▁future ▁talent , ▁whether ▁it ▁be ▁universities , ▁business ▁schools ▁or ▁engineering ▁schools . ▁the ▁group ▁serves ▁as ▁a ▁committed ▁partner ▁of ▁the ▁academic ▁world . ▁it ▁also ▁plays ▁a ▁role ▁in ▁curriculum ▁planning ▁by ▁updating ▁course work ▁so ▁that ▁it ▁matches ▁actual ▁business ▁needs ▁as ▁closely ▁as ▁possible . [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:0 20:0 21:0 22:1 23:1 24:1 25:2 26:3 27:4 28:5 29:6 30:7 31:8 32:9 33:10 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:21 45:21 46:22 47:23 48:24 49:25 50:25 51:26 52:27 53:28 54:29 55:30 56:30 57:31 58:32 59:33 60:34 61:35 62:36 63:37 64:38 65:39 66:40 67:41 68:41 69:42 70:43 71:44 72:45 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:63\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 17 508 180 450 17 21605 7522 51 206 4109 24 737 1498 33 18 2550 185 21 1983 1326 25 374 22 4964 81 623 4738 19 548 36 39 5536 19 264 1326 49 3814 1326 9 18 256 3697 34 24 2362 2229 20 18 2550 185 9 36 77 2254 24 682 25 8400 1777 37 19520 477 3552 102 29 36 2466 2746 264 794 34 3126 34 498 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 1\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁to ▁promote ▁the ▁bank ’ s ▁businesses ▁among ▁students ▁and ▁recruit ▁high - pot ential ▁candidates ▁or ▁future ▁talent , ▁the ▁ b n p ▁ pari bas ▁group ▁relies ▁on ▁its ▁campus ▁management ▁team , ▁which ▁ensures ▁that ▁the ▁bank ’ s ▁initiatives ▁carried ▁out ▁with ▁certain ▁schools ▁in ▁ franc e ▁match ▁the ▁needs ▁of ▁its ▁businesses , ▁departments ▁and ▁subsidiaries . ▁in ▁other ▁words , ▁the ▁role ▁of ▁campus ▁management ▁is ▁to ▁develop ▁quality ▁relationships ▁with ▁students ▁and ▁promote ▁the ▁group ’ s ▁businesses . ▁in ▁the ▁words ▁of ▁ je an - dom in ique ▁ cri scu olo , ▁manager ▁of ▁partnerships ▁and ▁relations ▁with ▁schools ▁and ▁universities , ▁“ we ▁strongly ▁believe ▁that ▁our ▁academic ▁relations ▁play ▁a ▁growing ▁role ▁in ▁transforming ▁our ▁group . ” [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:3 23:3 24:4 25:5 26:6 27:7 28:8 29:9 30:9 31:9 32:9 33:10 34:11 35:12 36:13 37:13 38:14 39:15 40:15 41:15 42:15 43:16 44:16 45:16 46:17 47:18 48:19 49:20 50:21 51:22 52:23 53:23 54:24 55:25 56:26 57:27 58:28 59:28 60:28 61:29 62:30 63:31 64:32 65:33 66:34 67:35 68:36 69:36 70:36 71:37 72:38 73:39 74:40 75:41 76:42 77:42 78:43 79:44 80:45 81:45 82:46 83:47 84:48 85:48 86:49 87:50 88:51 89:52 90:53 91:54 92:55 93:56 94:57 95:58 96:59 97:60 98:61 99:62 100:63 101:64 102:64 103:64 104:65 105:65 106:66 107:67 108:68 109:69 110:70 111:70 112:70 113:70 114:70 115:70 116:70 117:71 118:71 119:71 120:71 121:71 122:72 123:73 124:74 125:75 126:76 127:77 128:78 129:79 130:80 131:80 132:81 133:81 134:82 135:83 136:84 137:85 138:86 139:87 140:88 141:89 142:90 143:91 144:92 145:93 146:94 147:95 148:95 149:95\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 22 2573 18 1013 165 23 1812 447 466 21 10499 227 13 7111 12046 2338 49 623 4738 19 18 17 508 180 450 17 21605 7522 256 15429 31 81 3344 988 230 19 59 16554 29 18 1013 165 23 7750 1708 78 33 1028 1326 25 17 12786 93 854 18 794 20 81 1812 19 7023 21 19328 9 25 86 1006 19 18 682 20 3344 988 27 22 1627 882 4443 33 466 21 2573 18 256 165 23 1812 9 25 18 1006 20 17 2554 262 13 5173 153 8911 17 6752 9336 8292 19 1416 20 13501 21 1704 33 1326 21 5536 19 221 1603 3877 676 29 120 2550 1704 354 24 1358 682 25 20775 120 256 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 2\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁according ▁to ▁ je an - dom in ique ▁ cri scu olo , ▁ \" b n p ▁ pari bas ▁comprises ▁some ▁300 ▁businesses , ▁some ▁of ▁which ▁only ▁emerged ▁within ▁the ▁last ▁two ▁years . ▁including ▁data ▁scientists , ▁agile ▁coaches , ▁and ▁it ▁inspectors , ▁the ▁new ▁profession s ▁created ▁by ▁the ▁digital ▁transformation ▁broaden ▁the ▁palette ▁of ▁the ▁group ’ s ▁traditional ▁businesses . ▁in ▁a ▁changing ▁world , ▁governed ▁by ▁rapidly ▁ evo lving ▁international ▁financial ▁regulations , ▁many ▁new ▁opportunities ▁are ▁available ▁within ▁our ▁compliance ▁teams . ▁this ▁is ▁a ▁dynamic ▁profession ▁which , ▁due ▁to ▁regulatory , ▁geopolitical ▁and ▁societal ▁changes , ▁plays ▁an ▁increasingly ▁central ▁role ▁in ▁the ▁crossroads ▁of ▁strategy ▁and ▁the ▁daily ▁actions ▁of ▁the ▁bank ▁and ▁its ▁customers . ” [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:2 23:2 24:2 25:2 26:2 27:3 28:3 29:3 30:3 31:3 32:4 33:4 34:4 35:4 36:4 37:5 38:5 39:5 40:6 41:7 42:8 43:9 44:9 45:10 46:11 47:12 48:13 49:14 50:15 51:16 52:17 53:18 54:19 55:19 56:20 57:21 58:22 59:22 60:23 61:24 62:24 63:25 64:26 65:27 66:27 67:28 68:29 69:30 70:30 71:31 72:32 73:33 74:34 75:35 76:36 77:37 78:38 79:39 80:40 81:41 82:41 83:41 84:42 85:43 86:43 87:44 88:45 89:46 90:47 91:47 92:48 93:49 94:50 95:51 96:51 97:51 98:52 99:53 100:54 101:54 102:55 103:56 104:57 105:58 106:59 107:60 108:61 109:62 110:63 111:63 112:64 113:65 114:66 115:67 116:68 117:69 118:69 119:70 120:71 121:72 122:72 123:73 124:74 125:75 126:76 127:76 128:77 129:78 130:79 131:80 132:81 133:82 134:83 135:84 136:85 137:86 138:87 139:88 140:89 141:90 142:91 143:92 144:93 145:94 146:95 147:96 148:96 149:96\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 549 22 17 2554 262 13 5173 153 8911 17 6752 9336 8292 19 17 12 508 180 450 17 21605 7522 12562 106 3213 1812 19 106 20 59 114 4871 364 18 129 87 123 9 208 527 3582 19 29438 6668 19 21 36 8196 19 18 109 7862 23 927 37 18 2247 8681 15009 18 24797 20 18 256 165 23 1505 1812 9 25 24 3110 185 19 12413 37 4556 17 8934 13763 440 638 3768 19 142 109 2361 41 387 364 120 7486 1314 9 52 27 24 6148 7862 59 19 542 22 7582 19 31847 21 26459 1084 19 2254 48 3146 1063 682 25 18 28599 20 2240 21 18 1362 2442 20 18 1013 21 81 1391 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 3\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁this ▁large - scale ▁project ▁will ▁further ▁expand ▁in ▁coming ▁years , ▁notably ▁internationally . ▁ gill es ▁des chan el , ▁in ▁charge ▁of ▁academic ▁partnerships ▁at ▁ b n p ▁ pari bas ▁ ci b , ▁observe s ▁that ▁“ this ▁partnership ▁aims ▁to ▁become ▁one ▁of ▁the ▁leading ▁research ▁bodies ▁in ▁this ▁immensely ▁disruptive ▁technology . ▁it ▁will ▁also ▁play ▁a ▁role ▁in ▁transforming ▁the ▁financial ▁sector ▁through ▁publications ▁and ▁major ▁events ▁like ▁ viv a tech . ” [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:1 21:1 22:2 23:3 24:4 25:5 26:6 27:7 28:8 29:8 30:9 31:10 32:10 33:11 34:11 35:11 36:12 37:12 38:12 39:12 40:13 41:14 42:15 43:16 44:17 45:18 46:19 47:19 48:19 49:19 50:20 51:20 52:20 53:21 54:21 55:21 56:21 57:22 58:22 59:23 60:24 61:24 62:25 63:26 64:27 65:28 66:29 67:30 68:31 69:32 70:33 71:34 72:35 73:36 74:37 75:38 76:39 77:39 78:40 79:41 80:42 81:43 82:44 83:45 84:46 85:47 86:48 87:49 88:50 89:51 90:52 91:53 92:54 93:55 94:56 95:57 96:57 97:57 98:57 99:57 100:57\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 52 392 13 5339 686 53 608 3491 25 834 123 19 7003 8320 9 17 19200 202 9127 5863 530 19 25 1336 20 2550 13501 38 17 508 180 450 17 21605 7522 17 2294 508 19 9051 23 29 221 7567 4164 6471 22 401 65 20 18 895 557 2443 25 52 25170 27920 913 9 36 53 77 354 24 682 25 20775 18 638 1967 135 7134 21 383 1027 115 17 13430 101 4906 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 4\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁since ▁ jan uary ▁2016 , ▁ b n p ▁ pari bas ▁has ▁offered ▁an ▁excellence ▁program ▁targeting ▁new ▁master ’ s ▁level ▁graduates ▁ ( bac + 5 ) ▁who ▁show ▁high ▁potential . ▁the ▁aid ▁program ▁last s ▁18 ▁months ▁and ▁comprises ▁three ▁assignments ▁of ▁six ▁months ▁each . ▁it ▁serves ▁as ▁a ▁strong ▁career ▁accelerator ▁that ▁enables ▁participants ▁to ▁access ▁high - level ▁management ▁positions ▁at ▁a ▁faster ▁rate . ▁the ▁program ▁allows ▁participants ▁to ▁discover ▁the ▁ b n p ▁ pari bas ▁group ▁and ▁its ▁various ▁entities ▁in ▁ franc e ▁and ▁abroad , ▁build ▁an ▁internal ▁and ▁external ▁network ▁by ▁working ▁on ▁different ▁assignments ▁and ▁receive ▁personalized ▁assistance ▁from ▁a ▁mentor ▁and ▁coaching ▁firm ▁at ▁every ▁step ▁along ▁the ▁way . [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:1 21:1 22:2 23:2 24:3 25:3 26:3 27:3 28:4 29:4 30:4 31:5 32:6 33:7 34:8 35:9 36:10 37:11 38:12 39:12 40:12 41:13 42:14 43:15 44:15 45:15 46:15 47:15 48:15 49:16 50:17 51:18 52:19 53:19 54:20 55:21 56:22 57:23 58:23 59:24 60:25 61:26 62:27 63:28 64:29 65:30 66:31 67:32 68:33 69:33 70:34 71:35 72:36 73:37 74:38 75:39 76:40 77:41 78:42 79:43 80:44 81:45 82:46 83:46 84:46 85:47 86:48 87:49 88:50 89:51 90:52 91:52 92:53 93:54 94:55 95:56 96:57 97:58 98:59 99:60 100:60 101:60 102:60 103:61 104:61 105:61 106:62 107:63 108:64 109:65 110:66 111:67 112:68 113:68 114:68 115:69 116:70 117:70 118:71 119:72 120:73 121:74 122:75 123:76 124:77 125:78 126:79 127:80 128:81 129:82 130:83 131:84 132:85 133:86 134:87 135:88 136:89 137:90 138:91 139:92 140:93 141:94 142:95 143:96 144:97 145:97\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 196 17 6826 23130 2884 19 17 508 180 450 17 21605 7522 51 1295 48 12110 367 9696 109 2822 165 23 565 11225 17 10 14664 7385 217 11 61 351 227 1220 9 18 1443 367 129 23 501 399 21 12562 139 13814 20 404 399 231 9 36 3697 34 24 737 781 28670 29 8350 3809 22 752 227 13 3181 988 2695 38 24 3477 724 9 18 367 1765 3809 22 5183 18 17 508 180 450 17 21605 7522 256 21 81 807 10312 25 17 12786 93 21 4046 19 1266 48 2854 21 4543 1090 37 481 31 332 13814 21 1217 14857 2489 40 24 8570 21 7553 1338 38 300 1101 411 18 162 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 5\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁looking ▁to ▁kick start ▁your ▁career ▁with ▁several ▁months ▁of ▁intensive ▁immersion ▁at ▁a ▁major ▁banking ▁group ? ▁that ▁is ▁the ▁opportunity ▁offered ▁to ▁recent ▁graduates ▁by ▁ b n p ▁ pari bas ’ ▁graduate ▁programs . ▁these ▁challenging ▁18 - month ▁programs ▁spent ▁in ▁different ▁operational ▁functions ▁enable ▁participants ▁to ▁expand ▁their ▁banking ▁knowledge , ▁skills ▁and ▁professional ▁networks . ▁learn ▁more ▁about ▁this ▁promising ▁initiative . [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:3 23:4 24:5 25:6 26:7 27:8 28:9 29:10 30:11 31:12 32:13 33:14 34:15 35:15 36:16 37:17 38:18 39:19 40:20 41:21 42:22 43:23 44:24 45:25 46:25 47:25 48:25 49:26 50:26 51:26 52:26 53:27 54:28 55:28 56:29 57:30 58:31 59:31 60:31 61:32 62:33 63:34 64:35 65:36 66:37 67:38 68:39 69:40 70:41 71:42 72:43 73:44 74:44 75:45 76:46 77:47 78:48 79:48 80:49 81:50 82:51 83:52 84:53 85:54 86:54\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 589 22 4343 11119 73 781 33 294 399 20 10163 31203 38 24 383 4236 256 82 29 27 18 1394 1295 22 644 11225 37 17 508 180 450 17 21605 7522 165 3868 973 9 166 6215 501 13 2719 973 1188 25 332 6993 3730 4520 3809 22 3491 58 4236 1556 19 1924 21 1030 3986 9 1184 70 75 52 7559 4694 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 6\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁convinced ▁that ▁recent ▁and ▁future ▁graduates ▁represent ▁the ▁future ▁of ▁the ▁group , ▁ b n p ▁ pari bas ▁is ▁counting ▁on ▁their ▁talent ▁to ▁maintain ▁its ▁edge ▁in ▁the ▁market . ▁as ▁it ▁works ▁to ▁build ▁the ▁future ▁of ▁banking , ▁the ▁group ▁is ▁now ▁putting ▁in ▁place ▁ hr ▁measures ▁designed ▁to ▁attract ▁future ▁talent . ▁the ▁graduate ▁programs ▁are ▁one ▁example : ▁these ▁recruiting ▁programs ▁allow ▁selected ▁candidates ▁to ▁join ▁several ▁operational ▁functions ▁for ▁18 ▁months , ▁while ▁benefit ing ▁from ▁personalized ▁ hr ▁support . [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:4 23:5 24:6 25:7 26:8 27:9 28:10 29:11 30:11 31:12 32:12 33:12 34:12 35:13 36:13 37:13 38:14 39:15 40:16 41:17 42:18 43:19 44:20 45:21 46:22 47:23 48:24 49:25 50:25 51:26 52:27 53:28 54:29 55:30 56:31 57:32 58:33 59:34 60:34 61:35 62:36 63:37 64:38 65:39 66:40 67:41 68:42 69:42 70:43 71:44 72:45 73:46 74:47 75:48 76:48 77:49 78:50 79:51 80:52 81:53 82:54 83:54 84:55 85:56 86:57 87:58 88:59 89:60 90:61 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:68 99:69 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:74\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 5503 29 644 21 623 11225 3109 18 623 20 18 256 19 17 508 180 450 17 21605 7522 27 9309 31 58 4738 22 2224 81 2370 25 18 344 9 34 36 1021 22 1266 18 623 20 4236 19 18 256 27 145 2705 25 250 17 3563 1858 1064 22 4964 623 4738 9 18 3868 973 41 65 717 60 166 11512 973 910 2283 2338 22 1673 294 6993 3730 28 501 399 19 171 1887 56 40 14857 17 3563 309 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 7\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁hired ▁immediately ▁through ▁long - term ▁contracts , ▁participants ▁in ▁each ▁pathway ▁complete ▁an ▁ immer sive ▁and ▁personalized ▁curriculum ▁composed ▁of ▁three ▁professional ▁development ▁assignments , ▁each ▁lasting ▁six ▁months . ▁the ▁rotation ▁is ▁co - con struct ed ▁based ▁on ▁the ▁needs ▁of ▁the ▁bank ’ s ▁businesses ▁and ▁the ▁skills ▁or ▁interests ▁of ▁each ▁participant . ▁as ▁full ▁team ▁members , ▁participants ▁quickly ▁gain ▁experience ▁and ▁specific ▁skills ▁by ▁working ▁directly ▁with ▁their ▁peers ▁and ▁senior ▁employees ▁in ▁the ▁entity . ▁as ▁soon ▁as ▁they ▁are ▁hired , ▁participants ▁also ▁become ▁members ▁of ▁a ▁“ train ee ▁class ” ▁to ▁promote ▁joint ▁development , ▁experience - sharing ▁and ▁to ▁help ▁build ▁skills ▁as ▁a ▁group . ▁recruiting ▁programs ▁are ▁open ▁to ▁candidates ▁of ▁all ▁nationalities , ▁though ▁they ▁take ▁place ▁in ▁ franc e ▁ ( with ▁some ▁exceptions ) . [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:3 23:3 24:4 25:4 26:5 27:6 28:7 29:8 30:9 31:10 32:11 33:11 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:20 45:21 46:22 47:23 48:24 49:24 50:25 51:26 52:27 53:28 54:28 55:28 56:28 57:28 58:29 59:30 60:31 61:32 62:33 63:34 64:35 65:35 66:35 67:36 68:37 69:38 70:39 71:40 72:41 73:42 74:43 75:44 76:44 77:45 78:46 79:47 80:48 81:48 82:49 83:50 84:51 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:60 94:61 95:62 96:63 97:64 98:65 99:66 100:67 101:67 102:68 103:69 104:70 105:71 106:72 107:73 108:73 109:74 110:75 111:76 112:77 113:78 114:79 115:80 116:80 117:80 118:81 119:81 120:82 121:83 122:84 123:85 124:85 125:86 126:86 127:86 128:87 129:88 130:89 131:90 132:91 133:92 134:93 135:94 136:94 137:95 138:96 139:97 140:98 141:99 142:100 143:101 144:102 145:103 146:103 147:104 148:105 149:106 150:107 151:108 152:109 153:109 154:109 155:110 156:110 157:110 158:111 159:112 160:112 161:112\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 4500 1262 135 206 13 1483 4392 19 3809 25 231 13260 1009 48 17 16621 6578 21 14857 8400 4341 20 139 1030 503 13814 19 231 9691 404 399 9 18 9921 27 830 13 1865 5976 68 515 31 18 794 20 18 1013 165 23 1812 21 18 1924 49 2451 20 231 14056 9 34 410 230 340 19 3809 1068 2127 656 21 1240 1924 37 481 1509 33 58 12593 21 1118 1616 25 18 8756 9 34 802 34 63 41 4500 19 3809 77 401 340 20 24 221 11101 2461 1075 407 22 2573 1935 503 19 656 13 13756 21 22 222 1266 1924 34 24 256 9 11512 973 41 433 22 2338 20 71 27321 19 464 63 182 250 25 17 12786 93 17 10 3263 106 13534 11 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 8\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁the ▁programs ▁seek ▁a ▁wide ▁range ▁of ▁candidates . ▁while ▁master ’ s ▁ ( bac + 5 ) ▁graduates ▁in ▁math , ▁finance , ▁economics , ▁science , ▁business , ▁engineering ▁and ▁computer ▁science , ▁showing ▁strong ▁analytical ▁skills ▁and ▁mathematical ▁abilities , ▁can ▁apply ▁for ▁the ▁excellence ▁program , ▁digital ▁and ▁data - oriented ▁candidates ▁are ▁invited ▁to ▁join ▁the ▁digital ▁path . ▁the ▁digital ▁path ▁encourages ▁a ▁passionate ▁mindset ▁and ▁a ▁“ digit al ▁explorer ” ▁mentality , ▁rather ▁than ▁a ▁specific ▁degree . ▁in ▁this ▁way , ▁the ▁group ▁aims ▁to ▁develop ▁an ▁internal ▁mindset ▁focused ▁on ▁digital ▁innovation , ▁notably ▁by ▁integrating ▁the ▁following ▁candidates : [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:4 23:5 24:6 25:7 26:7 27:8 28:9 29:9 30:9 31:10 32:10 33:10 34:10 35:10 36:10 37:11 38:12 39:13 40:13 41:14 42:14 43:15 44:15 45:16 46:16 47:17 48:17 49:18 50:19 51:20 52:21 53:21 54:22 55:23 56:24 57:25 58:26 59:27 60:28 61:28 62:29 63:30 64:31 65:32 66:33 67:34 68:34 69:35 70:36 71:37 72:37 73:37 74:38 75:39 76:40 77:41 78:42 79:43 80:44 81:45 82:45 83:46 84:47 85:48 86:49 87:50 88:51 89:52 90:53 91:54 92:55 93:55 94:55 95:56 96:56 97:57 98:57 99:58 100:59 101:60 102:61 103:62 104:62 105:63 106:64 107:65 108:65 109:66 110:67 111:68 112:69 113:70 114:71 115:72 116:73 117:74 118:75 119:76 120:77 121:77 122:78 123:79 124:80 125:81 126:82 127:83 128:83\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 18 973 2304 24 1213 944 20 2338 9 171 2822 165 23 17 10 14664 7385 217 11 11225 25 8566 19 2761 19 10533 19 1767 19 264 19 3814 21 920 1767 19 2343 737 19799 1924 21 12956 8621 19 64 2285 28 18 12110 367 19 2247 21 527 13 6754 2338 41 3687 22 1673 18 2247 2606 9 18 2247 2606 13680 24 11037 26357 21 24 221 9235 212 18201 407 23149 19 870 100 24 1240 1693 9 25 52 162 19 18 256 6471 22 1627 48 2854 26357 2661 31 2247 7767 19 7003 37 21724 18 405 2338 60 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 9\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁for ▁all ▁programs , ▁initiative , ▁determination ▁and ▁curiosity ▁are ▁essential ▁qualities . ▁participants ▁should ▁also ▁be ▁committed ▁team ▁players ▁eager ▁to ▁tackle ▁collaborative ▁work . ▁candidates ▁need ▁not ▁have ▁a ▁specific ▁end ▁goal ▁in ▁mind ▁for ▁the ▁program — based ▁on ▁their ▁preferences ▁and ▁the ▁opportunities ▁available ▁with ▁each ▁business , ▁participants ▁can ▁co - con struct ▁their ▁pathway ▁with ▁ hr ▁teams ▁as ▁they ▁progress ▁through ▁the ▁program . [SEP]\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:3 23:3 24:4 25:5 26:6 27:7 28:8 29:9 30:9 31:10 32:11 33:12 34:13 35:14 36:15 37:16 38:17 39:18 40:19 41:20 42:21 43:21 44:22 45:23 46:24 47:25 48:26 49:27 50:28 51:29 52:30 53:31 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:38 63:39 64:40 65:41 66:42 67:43 68:44 69:44 70:45 71:46 72:47 73:47 74:47 75:47 76:48 77:49 78:50 79:51 80:51 81:52 82:53 83:54 84:55 85:56 86:57 87:58 88:58\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 28 71 973 19 4694 19 7775 21 15844 41 3018 10855 9 3809 170 77 39 2362 230 793 7456 22 6204 13295 154 9 2338 214 50 47 24 1240 239 935 25 823 28 18 367 1559 716 31 58 14319 21 18 2361 387 33 231 264 19 3809 64 830 13 1865 5976 58 13260 33 17 3563 1314 34 63 1915 135 18 367 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - example_index: 10\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/18/2019 10:19:39 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁ b n p ▁ pari bas ▁recruits ▁new ▁graduates ▁to ▁fulfill ▁assignments ▁lasting ▁up ▁to ▁16 ▁months ▁by ▁joining ▁the ▁group ▁through ▁one ▁of ▁its ▁international ▁subsidiaries . ▁what ▁types ▁of ▁positions ▁are ▁available ▁through ▁the ▁ vie ▁program ? ▁what ▁destinations ▁does ▁it ▁offer ? ▁what ▁types ▁of ▁applicants ▁do ▁we ▁look ▁for ? ▁let ’ s ▁take ▁a ▁closer ▁look ▁at ▁the ▁international ▁corporate ▁volunteer ▁program , ▁which ▁provides ▁a ▁chance ▁to ▁launch ▁your ▁international ▁career ▁in ▁a ▁position ▁with ▁real ▁ responsibilities . [SEP]\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:0 20:0 21:0 22:1 23:1 24:1 25:2 26:3 27:4 28:5 29:6 30:7 31:8 32:9 33:10 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:21 45:22 46:22 47:23 48:24 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:31 57:32 58:32 59:33 60:34 61:35 62:36 63:37 64:37 65:38 66:39 67:40 68:41 69:42 70:43 71:44 72:45 73:45 74:46 75:46 76:46 77:47 78:48 79:49 80:50 81:51 82:52 83:53 84:54 85:55 86:56 87:56 88:57 89:58 90:59 91:60 92:61 93:62 94:63 95:64 96:65 97:66 98:67 99:68 100:69 101:70 102:71 103:71 104:71\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 17 508 180 450 17 21605 7522 17954 109 11225 22 9652 13814 9691 76 22 504 399 37 4391 18 256 135 65 20 81 440 19328 9 113 1971 20 2695 41 387 135 18 17 9209 367 82 113 11835 358 36 670 82 113 1971 20 10914 112 80 338 28 82 618 165 23 182 24 2492 338 38 18 440 2348 6134 367 19 59 1176 24 1116 22 2498 73 440 781 25 24 740 33 525 17 7517 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_dev_xlnet-base-cased_384\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.reader_sklearn - ***** Running evaluation *****\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.reader_sklearn - Num examples = 11\n", + "07/18/2019 10:19:40 - INFO - cdqa.reader.reader_sklearn - Batch size = 8\n", + "Evaluating: 100%|██████████| 2/2 [00:31<00:00, 18.72s/it]\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Writing predictions to: ./predictions_.json\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'business schools or engineering schools. the group serves as a committed partner of the academic' in 'business schools or engineering schools. The Group serves as a committed partner of the academic'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether' in 'talent, whether'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether it be universities, business schools' in 'talent, whether it be universities, business schools'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether it be universities, business schools' in 'talent, whether it be universities, business schools'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'future talent, the bnp paribas group relies on its campus management team, which ensures that the' in 'future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'future talent, the' in 'future talent, the BNP'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'candidates or future talent, the bnp paribas group relies on its campus management team, which ensures that the' in 'candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'paribas group relies on its campus management team, which ensures that the bank’s initiatives carried out with certain schools in france' in 'Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'inique criscuolo, \"bnp pari' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: ', notably internationally. gilles deschanel, in charge of academic partnerships at bnp paribas' in 'years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB,'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: ', notably internationally. gilles deschanel, in charge of academic partnerships at bnp paribas' in 'years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB,'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp pari' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp pari' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp paribas' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the b' in 'at a faster rate. The program allows participants to discover the BNP'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the b' in 'at a faster rate. The program allows participants to discover the BNP'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the bnp paribas' in 'at a faster rate. The program allows participants to discover the BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp paribas' in 'January 2016, BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp paribas' in 'January 2016, BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, b' in 'January 2016, BNP'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp pari' in 'January 2016, BNP Paribas'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'that is the opportunity offered to recent graduates by bnp paribas’' in 'That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group? that' in 'a major banking group? That'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'that is the opportunity offered to recent graduates by bnp paribas’' in 'That is the opportunity offered to recent graduates by BNP Paribas’'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group?' in 'a major banking group?'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'their talent to maintain its edge in the market. as it works to build the future of' in 'their talent to maintain its edge in the market. As it works to build the future of'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'the entity. as soon as they are hired, participants also become members of a “train' in 'the entity. As soon as they are hired, participants also become members of a “trainee'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'science, business' in 'science, business,'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'excellence program, digital and data-oriented candidates are invited to join the digital path. the digital path encourages a passionate mindset and a “digit' in 'Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'excellence program, digital and data-oriented candidates are invited to join the digital path. the digital path encourages a passionate mindset and a “' in 'Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: ', determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: ', determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'participants should also be committed team players eager to tackle collaborative work. candidates need not have a specific end goal in mind for the' in 'Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'participants should also be committed team players eager to tackle collaborative work. candidates need not' in 'Participants should also be committed team players eager to tackle collaborative work. Candidates need not'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not' in 'programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should' in 'programs, initiative, determination and curiosity are essential qualities. Participants should'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should' in 'programs, initiative, determination and curiosity are essential qualities. Participants should'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have a specific end goal in mind for the program' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", + "07/18/2019 10:20:11 - INFO - cdqa.reader.utils_squad - Unable to find text: 'up to 16 months by joining the group through one of its international subsidiaries. what' in 'up to 16 months by joining the Group through one of its international subsidiaries. What'\n" ], "name": "stderr" } ] }, - { - "cell_type": "code", - "metadata": { - "id": "85HVKxOJYHuN", - "colab_type": "code", - "outputId": "4d7c2944-8570-41b0-e874-6b4722722daf", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - } - }, - "source": [ - "out_eval, final_prediction = reader.predict(X='')" - ], - "execution_count": 20, - "outputs": [ - { - "output_type": "stream", - "text": [ - "07/18/2019 09:00:33 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at [{'title': 'BNP Paribas’ commitment to universities and schools', 'paragraphs': [{'context': 'BNP Paribas has long maintained a strong relationship with the academic world and target schools in order to attract its future talent, whether it be universities, business schools or engineering schools. The Group serves as a committed partner of the academic world. It also plays a role in curriculum planning by updating coursework so that it matches actual business needs as closely as possible.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '100e1c8e-69f7-4f4b-9f3c-936f33bcc71e'}]}, {'context': 'To promote the bank’s businesses among students and recruit high-potential candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France match the needs of its businesses, departments and subsidiaries. In other words, the role of Campus Management is to develop quality relationships with students and promote the Group’s businesses. In the words of Jean-Dominique Criscuolo, Manager of Partnerships and Relations with Schools and Universities, “We strongly believe that our Academic Relations play a growing role in transforming our Group.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': 'cfac01a5-98eb-4d76-8066-adb63e24751c'}]}, {'context': 'According to Jean-Dominique Criscuolo, \"BNP Paribas comprises some 300 businesses, some of which only emerged within the last two years. Including data scientists, agile coaches, and IT inspectors, the new professions created by the digital transformation broaden the palette of the Group’s traditional businesses. In a changing world, governed by rapidly evolving international financial regulations, many new opportunities are available within our Compliance teams. This is a dynamic profession which, due to regulatory, geopolitical and societal changes, plays an increasingly central role in the crossroads of strategy and the daily actions of the bank and its customers.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': 'dbba50ec-f907-46ed-9f8a-e106b17585f5'}]}, {'context': 'This large-scale project will further expand in coming years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB, observes that “this partnership aims to become one of the leading research bodies in this immensely disruptive technology. It will also play a role in transforming the financial sector through publications and major events like VivaTech.”', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '4cbf8827-c3a9-42c2-9bd2-fd3ca38fc2e1'}]}, {'context': 'Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '9b1e4395-6554-4fba-bd80-cd9639a687f4'}]}]}, {'title': 'BNP Paribas Graduate Programs in France', 'paragraphs': [{'context': 'Looking to kickstart your career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’ Graduate Programs. These challenging 18-month programs spent in different operational functions enable participants to expand their banking knowledge, skills and professional networks. Learn more about this promising initiative.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '2e266853-ed16-4fce-9701-a5d5c7005b80'}]}, {'context': 'Convinced that recent and future graduates represent the future of the Group, BNP Paribas is counting on their talent to maintain its edge in the market. As it works to build the future of banking, the Group is now putting in place HR measures designed to attract future talent. The Graduate Programs are one example: these recruiting programs allow selected candidates to join several operational functions for 18 months, while benefiting from personalized HR support.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '56d2240c-0d26-4534-bf99-87aecec38523'}]}, {'context': 'Hired immediately through long-term contracts, participants in each pathway complete an immersive and personalized curriculum composed of three professional development assignments, each lasting six months. The rotation is co-constructed based on the needs of the bank’s businesses and the skills or interests of each participant. As full team members, participants quickly gain experience and specific skills by working directly with their peers and senior employees in the entity. As soon as they are hired, participants also become members of a “trainee class” to promote joint development, experience-sharing and to help build skills as a group. Recruiting programs are open to candidates of all nationalities, though they take place in France (with some exceptions).', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '719ec8c6-fd2b-4ee5-b4b5-9c77b6f7e27c'}]}, {'context': 'The programs seek a wide range of candidates. While Master’s (Bac+5) graduates in math, finance, economics, science, business, engineering and computer science, showing strong analytical skills and mathematical abilities, can apply for the Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital explorer” mentality, rather than a specific degree. In this way, the Group aims to develop an internal mindset focused on digital innovation, notably by integrating the following candidates:', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '533f1412-6289-4a83-bb96-efbae2ba508c'}]}, {'context': 'For all programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based on their preferences and the opportunities available with each business, participants can co-construct their pathway with HR teams as they progress through the program.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '062d4586-93bf-4ab3-b03f-f15aa89f39e8'}]}]}, {'title': 'Making the most of your VIE!', 'paragraphs': [{'context': 'BNP Paribas recruits new graduates to fulfill assignments lasting up to 16 months by joining the Group through one of its international subsidiaries. What types of positions are available through the VIE program? What destinations does it offer? What types of applicants do we look for? Let’s take a closer look at the international corporate volunteer program, which provides a chance to launch your international career in a position with real responsibilities.', 'qas': [{'answers': [], 'question': 'Since when does the Excellence Program of BNP Paribas exist?', 'id': '60844039-fbfb-48ba-bd8a-824c0fc36935'}]}]}]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁ b n p ▁ pari bas ▁has ▁long ▁maintained ▁a ▁strong ▁relationship ▁with ▁the ▁academic ▁world ▁and ▁target ▁schools ▁in ▁order ▁to ▁attract ▁its ▁future ▁talent , ▁whether ▁it ▁be ▁universities , ▁business ▁schools ▁or ▁engineering ▁schools . ▁the ▁group ▁serves ▁as ▁a ▁committed ▁partner ▁of ▁the ▁academic ▁world . ▁it ▁also ▁plays ▁a ▁role ▁in ▁curriculum ▁planning ▁by ▁updating ▁course work ▁so ▁that ▁it ▁matches ▁actual ▁business ▁needs ▁as ▁closely ▁as ▁possible . [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:0 20:0 21:0 22:1 23:1 24:1 25:2 26:3 27:4 28:5 29:6 30:7 31:8 32:9 33:10 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:21 45:21 46:22 47:23 48:24 49:25 50:25 51:26 52:27 53:28 54:29 55:30 56:30 57:31 58:32 59:33 60:34 61:35 62:36 63:37 64:38 65:39 66:40 67:41 68:41 69:42 70:43 71:44 72:45 73:46 74:47 75:48 76:49 77:50 78:51 79:52 80:52 81:53 82:54 83:55 84:56 85:57 86:58 87:59 88:60 89:61 90:62 91:63 92:63\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 17 508 180 450 17 21605 7522 51 206 4109 24 737 1498 33 18 2550 185 21 1983 1326 25 374 22 4964 81 623 4738 19 548 36 39 5536 19 264 1326 49 3814 1326 9 18 256 3697 34 24 2362 2229 20 18 2550 185 9 36 77 2254 24 682 25 8400 1777 37 19520 477 3552 102 29 36 2466 2746 264 794 34 3126 34 498 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 1\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁to ▁promote ▁the ▁bank ’ s ▁businesses ▁among ▁students ▁and ▁recruit ▁high - pot ential ▁candidates ▁or ▁future ▁talent , ▁the ▁ b n p ▁ pari bas ▁group ▁relies ▁on ▁its ▁campus ▁management ▁team , ▁which ▁ensures ▁that ▁the ▁bank ’ s ▁initiatives ▁carried ▁out ▁with ▁certain ▁schools ▁in ▁ franc e ▁match ▁the ▁needs ▁of ▁its ▁businesses , ▁departments ▁and ▁subsidiaries . ▁in ▁other ▁words , ▁the ▁role ▁of ▁campus ▁management ▁is ▁to ▁develop ▁quality ▁relationships ▁with ▁students ▁and ▁promote ▁the ▁group ’ s ▁businesses . ▁in ▁the ▁words ▁of ▁ je an - dom in ique ▁ cri scu olo , ▁manager ▁of ▁partnerships ▁and ▁relations ▁with ▁schools ▁and ▁universities , ▁“ we ▁strongly ▁believe ▁that ▁our ▁academic ▁relations ▁play ▁a ▁growing ▁role ▁in ▁transforming ▁our ▁group . ” [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:3 23:3 24:4 25:5 26:6 27:7 28:8 29:9 30:9 31:9 32:9 33:10 34:11 35:12 36:13 37:13 38:14 39:15 40:15 41:15 42:15 43:16 44:16 45:16 46:17 47:18 48:19 49:20 50:21 51:22 52:23 53:23 54:24 55:25 56:26 57:27 58:28 59:28 60:28 61:29 62:30 63:31 64:32 65:33 66:34 67:35 68:36 69:36 70:36 71:37 72:38 73:39 74:40 75:41 76:42 77:42 78:43 79:44 80:45 81:45 82:46 83:47 84:48 85:48 86:49 87:50 88:51 89:52 90:53 91:54 92:55 93:56 94:57 95:58 96:59 97:60 98:61 99:62 100:63 101:64 102:64 103:64 104:65 105:65 106:66 107:67 108:68 109:69 110:70 111:70 112:70 113:70 114:70 115:70 116:70 117:71 118:71 119:71 120:71 121:71 122:72 123:73 124:74 125:75 126:76 127:77 128:78 129:79 130:80 131:80 132:81 133:81 134:82 135:83 136:84 137:85 138:86 139:87 140:88 141:89 142:90 143:91 144:92 145:93 146:94 147:95 148:95 149:95\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 22 2573 18 1013 165 23 1812 447 466 21 10499 227 13 7111 12046 2338 49 623 4738 19 18 17 508 180 450 17 21605 7522 256 15429 31 81 3344 988 230 19 59 16554 29 18 1013 165 23 7750 1708 78 33 1028 1326 25 17 12786 93 854 18 794 20 81 1812 19 7023 21 19328 9 25 86 1006 19 18 682 20 3344 988 27 22 1627 882 4443 33 466 21 2573 18 256 165 23 1812 9 25 18 1006 20 17 2554 262 13 5173 153 8911 17 6752 9336 8292 19 1416 20 13501 21 1704 33 1326 21 5536 19 221 1603 3877 676 29 120 2550 1704 354 24 1358 682 25 20775 120 256 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 2\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁according ▁to ▁ je an - dom in ique ▁ cri scu olo , ▁ \" b n p ▁ pari bas ▁comprises ▁some ▁300 ▁businesses , ▁some ▁of ▁which ▁only ▁emerged ▁within ▁the ▁last ▁two ▁years . ▁including ▁data ▁scientists , ▁agile ▁coaches , ▁and ▁it ▁inspectors , ▁the ▁new ▁profession s ▁created ▁by ▁the ▁digital ▁transformation ▁broaden ▁the ▁palette ▁of ▁the ▁group ’ s ▁traditional ▁businesses . ▁in ▁a ▁changing ▁world , ▁governed ▁by ▁rapidly ▁ evo lving ▁international ▁financial ▁regulations , ▁many ▁new ▁opportunities ▁are ▁available ▁within ▁our ▁compliance ▁teams . ▁this ▁is ▁a ▁dynamic ▁profession ▁which , ▁due ▁to ▁regulatory , ▁geopolitical ▁and ▁societal ▁changes , ▁plays ▁an ▁increasingly ▁central ▁role ▁in ▁the ▁crossroads ▁of ▁strategy ▁and ▁the ▁daily ▁actions ▁of ▁the ▁bank ▁and ▁its ▁customers . ” [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:2 23:2 24:2 25:2 26:2 27:3 28:3 29:3 30:3 31:3 32:4 33:4 34:4 35:4 36:4 37:5 38:5 39:5 40:6 41:7 42:8 43:9 44:9 45:10 46:11 47:12 48:13 49:14 50:15 51:16 52:17 53:18 54:19 55:19 56:20 57:21 58:22 59:22 60:23 61:24 62:24 63:25 64:26 65:27 66:27 67:28 68:29 69:30 70:30 71:31 72:32 73:33 74:34 75:35 76:36 77:37 78:38 79:39 80:40 81:41 82:41 83:41 84:42 85:43 86:43 87:44 88:45 89:46 90:47 91:47 92:48 93:49 94:50 95:51 96:51 97:51 98:52 99:53 100:54 101:54 102:55 103:56 104:57 105:58 106:59 107:60 108:61 109:62 110:63 111:63 112:64 113:65 114:66 115:67 116:68 117:69 118:69 119:70 120:71 121:72 122:72 123:73 124:74 125:75 126:76 127:76 128:77 129:78 130:79 131:80 132:81 133:82 134:83 135:84 136:85 137:86 138:87 139:88 140:89 141:90 142:91 143:92 144:93 145:94 146:95 147:96 148:96 149:96\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 549 22 17 2554 262 13 5173 153 8911 17 6752 9336 8292 19 17 12 508 180 450 17 21605 7522 12562 106 3213 1812 19 106 20 59 114 4871 364 18 129 87 123 9 208 527 3582 19 29438 6668 19 21 36 8196 19 18 109 7862 23 927 37 18 2247 8681 15009 18 24797 20 18 256 165 23 1505 1812 9 25 24 3110 185 19 12413 37 4556 17 8934 13763 440 638 3768 19 142 109 2361 41 387 364 120 7486 1314 9 52 27 24 6148 7862 59 19 542 22 7582 19 31847 21 26459 1084 19 2254 48 3146 1063 682 25 18 28599 20 2240 21 18 1362 2442 20 18 1013 21 81 1391 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 3\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁this ▁large - scale ▁project ▁will ▁further ▁expand ▁in ▁coming ▁years , ▁notably ▁internationally . ▁ gill es ▁des chan el , ▁in ▁charge ▁of ▁academic ▁partnerships ▁at ▁ b n p ▁ pari bas ▁ ci b , ▁observe s ▁that ▁“ this ▁partnership ▁aims ▁to ▁become ▁one ▁of ▁the ▁leading ▁research ▁bodies ▁in ▁this ▁immensely ▁disruptive ▁technology . ▁it ▁will ▁also ▁play ▁a ▁role ▁in ▁transforming ▁the ▁financial ▁sector ▁through ▁publications ▁and ▁major ▁events ▁like ▁ viv a tech . ” [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:1 21:1 22:2 23:3 24:4 25:5 26:6 27:7 28:8 29:8 30:9 31:10 32:10 33:11 34:11 35:11 36:12 37:12 38:12 39:12 40:13 41:14 42:15 43:16 44:17 45:18 46:19 47:19 48:19 49:19 50:20 51:20 52:20 53:21 54:21 55:21 56:21 57:22 58:22 59:23 60:24 61:24 62:25 63:26 64:27 65:28 66:29 67:30 68:31 69:32 70:33 71:34 72:35 73:36 74:37 75:38 76:39 77:39 78:40 79:41 80:42 81:43 82:44 83:45 84:46 85:47 86:48 87:49 88:50 89:51 90:52 91:53 92:54 93:55 94:56 95:57 96:57 97:57 98:57 99:57 100:57\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 52 392 13 5339 686 53 608 3491 25 834 123 19 7003 8320 9 17 19200 202 9127 5863 530 19 25 1336 20 2550 13501 38 17 508 180 450 17 21605 7522 17 2294 508 19 9051 23 29 221 7567 4164 6471 22 401 65 20 18 895 557 2443 25 52 25170 27920 913 9 36 53 77 354 24 682 25 20775 18 638 1967 135 7134 21 383 1027 115 17 13430 101 4906 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 4\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁since ▁ jan uary ▁2016 , ▁ b n p ▁ pari bas ▁has ▁offered ▁an ▁excellence ▁program ▁targeting ▁new ▁master ’ s ▁level ▁graduates ▁ ( bac + 5 ) ▁who ▁show ▁high ▁potential . ▁the ▁aid ▁program ▁last s ▁18 ▁months ▁and ▁comprises ▁three ▁assignments ▁of ▁six ▁months ▁each . ▁it ▁serves ▁as ▁a ▁strong ▁career ▁accelerator ▁that ▁enables ▁participants ▁to ▁access ▁high - level ▁management ▁positions ▁at ▁a ▁faster ▁rate . ▁the ▁program ▁allows ▁participants ▁to ▁discover ▁the ▁ b n p ▁ pari bas ▁group ▁and ▁its ▁various ▁entities ▁in ▁ franc e ▁and ▁abroad , ▁build ▁an ▁internal ▁and ▁external ▁network ▁by ▁working ▁on ▁different ▁assignments ▁and ▁receive ▁personalized ▁assistance ▁from ▁a ▁mentor ▁and ▁coaching ▁firm ▁at ▁every ▁step ▁along ▁the ▁way . [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:1 21:1 22:2 23:2 24:3 25:3 26:3 27:3 28:4 29:4 30:4 31:5 32:6 33:7 34:8 35:9 36:10 37:11 38:12 39:12 40:12 41:13 42:14 43:15 44:15 45:15 46:15 47:15 48:15 49:16 50:17 51:18 52:19 53:19 54:20 55:21 56:22 57:23 58:23 59:24 60:25 61:26 62:27 63:28 64:29 65:30 66:31 67:32 68:33 69:33 70:34 71:35 72:36 73:37 74:38 75:39 76:40 77:41 78:42 79:43 80:44 81:45 82:46 83:46 84:46 85:47 86:48 87:49 88:50 89:51 90:52 91:52 92:53 93:54 94:55 95:56 96:57 97:58 98:59 99:60 100:60 101:60 102:60 103:61 104:61 105:61 106:62 107:63 108:64 109:65 110:66 111:67 112:68 113:68 114:68 115:69 116:70 117:70 118:71 119:72 120:73 121:74 122:75 123:76 124:77 125:78 126:79 127:80 128:81 129:82 130:83 131:84 132:85 133:86 134:87 135:88 136:89 137:90 138:91 139:92 140:93 141:94 142:95 143:96 144:97 145:97\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 196 17 6826 23130 2884 19 17 508 180 450 17 21605 7522 51 1295 48 12110 367 9696 109 2822 165 23 565 11225 17 10 14664 7385 217 11 61 351 227 1220 9 18 1443 367 129 23 501 399 21 12562 139 13814 20 404 399 231 9 36 3697 34 24 737 781 28670 29 8350 3809 22 752 227 13 3181 988 2695 38 24 3477 724 9 18 367 1765 3809 22 5183 18 17 508 180 450 17 21605 7522 256 21 81 807 10312 25 17 12786 93 21 4046 19 1266 48 2854 21 4543 1090 37 481 31 332 13814 21 1217 14857 2489 40 24 8570 21 7553 1338 38 300 1101 411 18 162 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000005\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 5\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁looking ▁to ▁kick start ▁your ▁career ▁with ▁several ▁months ▁of ▁intensive ▁immersion ▁at ▁a ▁major ▁banking ▁group ? ▁that ▁is ▁the ▁opportunity ▁offered ▁to ▁recent ▁graduates ▁by ▁ b n p ▁ pari bas ’ ▁graduate ▁programs . ▁these ▁challenging ▁18 - month ▁programs ▁spent ▁in ▁different ▁operational ▁functions ▁enable ▁participants ▁to ▁expand ▁their ▁banking ▁knowledge , ▁skills ▁and ▁professional ▁networks . ▁learn ▁more ▁about ▁this ▁promising ▁initiative . [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:3 23:4 24:5 25:6 26:7 27:8 28:9 29:10 30:11 31:12 32:13 33:14 34:15 35:15 36:16 37:17 38:18 39:19 40:20 41:21 42:22 43:23 44:24 45:25 46:25 47:25 48:25 49:26 50:26 51:26 52:26 53:27 54:28 55:28 56:29 57:30 58:31 59:31 60:31 61:32 62:33 63:34 64:35 65:36 66:37 67:38 68:39 69:40 70:41 71:42 72:43 73:44 74:44 75:45 76:46 77:47 78:48 79:48 80:49 81:50 82:51 83:52 84:53 85:54 86:54\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 589 22 4343 11119 73 781 33 294 399 20 10163 31203 38 24 383 4236 256 82 29 27 18 1394 1295 22 644 11225 37 17 508 180 450 17 21605 7522 165 3868 973 9 166 6215 501 13 2719 973 1188 25 332 6993 3730 4520 3809 22 3491 58 4236 1556 19 1924 21 1030 3986 9 1184 70 75 52 7559 4694 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000006\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 6\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁convinced ▁that ▁recent ▁and ▁future ▁graduates ▁represent ▁the ▁future ▁of ▁the ▁group , ▁ b n p ▁ pari bas ▁is ▁counting ▁on ▁their ▁talent ▁to ▁maintain ▁its ▁edge ▁in ▁the ▁market . ▁as ▁it ▁works ▁to ▁build ▁the ▁future ▁of ▁banking , ▁the ▁group ▁is ▁now ▁putting ▁in ▁place ▁ hr ▁measures ▁designed ▁to ▁attract ▁future ▁talent . ▁the ▁graduate ▁programs ▁are ▁one ▁example : ▁these ▁recruiting ▁programs ▁allow ▁selected ▁candidates ▁to ▁join ▁several ▁operational ▁functions ▁for ▁18 ▁months , ▁while ▁benefit ing ▁from ▁personalized ▁ hr ▁support . [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:4 23:5 24:6 25:7 26:8 27:9 28:10 29:11 30:11 31:12 32:12 33:12 34:12 35:13 36:13 37:13 38:14 39:15 40:16 41:17 42:18 43:19 44:20 45:21 46:22 47:23 48:24 49:25 50:25 51:26 52:27 53:28 54:29 55:30 56:31 57:32 58:33 59:34 60:34 61:35 62:36 63:37 64:38 65:39 66:40 67:41 68:42 69:42 70:43 71:44 72:45 73:46 74:47 75:48 76:48 77:49 78:50 79:51 80:52 81:53 82:54 83:54 84:55 85:56 86:57 87:58 88:59 89:60 90:61 91:62 92:63 93:64 94:65 95:66 96:67 97:68 98:68 99:69 100:70 101:70 102:71 103:72 104:73 105:73 106:74 107:74\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 5503 29 644 21 623 11225 3109 18 623 20 18 256 19 17 508 180 450 17 21605 7522 27 9309 31 58 4738 22 2224 81 2370 25 18 344 9 34 36 1021 22 1266 18 623 20 4236 19 18 256 27 145 2705 25 250 17 3563 1858 1064 22 4964 623 4738 9 18 3868 973 41 65 717 60 166 11512 973 910 2283 2338 22 1673 294 6993 3730 28 501 399 19 171 1887 56 40 14857 17 3563 309 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000007\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 7\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁hired ▁immediately ▁through ▁long - term ▁contracts , ▁participants ▁in ▁each ▁pathway ▁complete ▁an ▁ immer sive ▁and ▁personalized ▁curriculum ▁composed ▁of ▁three ▁professional ▁development ▁assignments , ▁each ▁lasting ▁six ▁months . ▁the ▁rotation ▁is ▁co - con struct ed ▁based ▁on ▁the ▁needs ▁of ▁the ▁bank ’ s ▁businesses ▁and ▁the ▁skills ▁or ▁interests ▁of ▁each ▁participant . ▁as ▁full ▁team ▁members , ▁participants ▁quickly ▁gain ▁experience ▁and ▁specific ▁skills ▁by ▁working ▁directly ▁with ▁their ▁peers ▁and ▁senior ▁employees ▁in ▁the ▁entity . ▁as ▁soon ▁as ▁they ▁are ▁hired , ▁participants ▁also ▁become ▁members ▁of ▁a ▁“ train ee ▁class ” ▁to ▁promote ▁joint ▁development , ▁experience - sharing ▁and ▁to ▁help ▁build ▁skills ▁as ▁a ▁group . ▁recruiting ▁programs ▁are ▁open ▁to ▁candidates ▁of ▁all ▁nationalities , ▁though ▁they ▁take ▁place ▁in ▁ franc e ▁ ( with ▁some ▁exceptions ) . [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:3 23:3 24:4 25:4 26:5 27:6 28:7 29:8 30:9 31:10 32:11 33:11 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:20 45:21 46:22 47:23 48:24 49:24 50:25 51:26 52:27 53:28 54:28 55:28 56:28 57:28 58:29 59:30 60:31 61:32 62:33 63:34 64:35 65:35 66:35 67:36 68:37 69:38 70:39 71:40 72:41 73:42 74:43 75:44 76:44 77:45 78:46 79:47 80:48 81:48 82:49 83:50 84:51 85:52 86:53 87:54 88:55 89:56 90:57 91:58 92:59 93:60 94:61 95:62 96:63 97:64 98:65 99:66 100:67 101:67 102:68 103:69 104:70 105:71 106:72 107:73 108:73 109:74 110:75 111:76 112:77 113:78 114:79 115:80 116:80 117:80 118:81 119:81 120:82 121:83 122:84 123:85 124:85 125:86 126:86 127:86 128:87 129:88 130:89 131:90 132:91 133:92 134:93 135:94 136:94 137:95 138:96 139:97 140:98 141:99 142:100 143:101 144:102 145:103 146:103 147:104 148:105 149:106 150:107 151:108 152:109 153:109 154:109 155:110 156:110 157:110 158:111 159:112 160:112 161:112\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True 143:True 144:True 145:True 146:True 147:True 148:True 149:True 150:True 151:True 152:True 153:True 154:True 155:True 156:True 157:True 158:True 159:True 160:True 161:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 4500 1262 135 206 13 1483 4392 19 3809 25 231 13260 1009 48 17 16621 6578 21 14857 8400 4341 20 139 1030 503 13814 19 231 9691 404 399 9 18 9921 27 830 13 1865 5976 68 515 31 18 794 20 18 1013 165 23 1812 21 18 1924 49 2451 20 231 14056 9 34 410 230 340 19 3809 1068 2127 656 21 1240 1924 37 481 1509 33 58 12593 21 1118 1616 25 18 8756 9 34 802 34 63 41 4500 19 3809 77 401 340 20 24 221 11101 2461 1075 407 22 2573 1935 503 19 656 13 13756 21 22 222 1266 1924 34 24 256 9 11512 973 41 433 22 2338 20 71 27321 19 464 63 182 250 25 17 12786 93 17 10 3263 106 13534 11 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000008\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 8\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁the ▁programs ▁seek ▁a ▁wide ▁range ▁of ▁candidates . ▁while ▁master ’ s ▁ ( bac + 5 ) ▁graduates ▁in ▁math , ▁finance , ▁economics , ▁science , ▁business , ▁engineering ▁and ▁computer ▁science , ▁showing ▁strong ▁analytical ▁skills ▁and ▁mathematical ▁abilities , ▁can ▁apply ▁for ▁the ▁excellence ▁program , ▁digital ▁and ▁data - oriented ▁candidates ▁are ▁invited ▁to ▁join ▁the ▁digital ▁path . ▁the ▁digital ▁path ▁encourages ▁a ▁passionate ▁mindset ▁and ▁a ▁“ digit al ▁explorer ” ▁mentality , ▁rather ▁than ▁a ▁specific ▁degree . ▁in ▁this ▁way , ▁the ▁group ▁aims ▁to ▁develop ▁an ▁internal ▁mindset ▁focused ▁on ▁digital ▁innovation , ▁notably ▁by ▁integrating ▁the ▁following ▁candidates : [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:3 22:4 23:5 24:6 25:7 26:7 27:8 28:9 29:9 30:9 31:10 32:10 33:10 34:10 35:10 36:10 37:11 38:12 39:13 40:13 41:14 42:14 43:15 44:15 45:16 46:16 47:17 48:17 49:18 50:19 51:20 52:21 53:21 54:22 55:23 56:24 57:25 58:26 59:27 60:28 61:28 62:29 63:30 64:31 65:32 66:33 67:34 68:34 69:35 70:36 71:37 72:37 73:37 74:38 75:39 76:40 77:41 78:42 79:43 80:44 81:45 82:45 83:46 84:47 85:48 86:49 87:50 88:51 89:52 90:53 91:54 92:55 93:55 94:55 95:56 96:56 97:57 98:57 99:58 100:59 101:60 102:61 103:62 104:62 105:63 106:64 107:65 108:65 109:66 110:67 111:68 112:69 113:70 114:71 115:72 116:73 117:74 118:75 119:76 120:77 121:77 122:78 123:79 124:80 125:81 126:82 127:83 128:83\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 18 973 2304 24 1213 944 20 2338 9 171 2822 165 23 17 10 14664 7385 217 11 11225 25 8566 19 2761 19 10533 19 1767 19 264 19 3814 21 920 1767 19 2343 737 19799 1924 21 12956 8621 19 64 2285 28 18 12110 367 19 2247 21 527 13 6754 2338 41 3687 22 1673 18 2247 2606 9 18 2247 2606 13680 24 11037 26357 21 24 221 9235 212 18201 407 23149 19 870 100 24 1240 1693 9 25 52 162 19 18 256 6471 22 1627 48 2854 26357 2661 31 2247 7767 19 7003 37 21724 18 405 2338 60 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000009\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 9\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁for ▁all ▁programs , ▁initiative , ▁determination ▁and ▁curiosity ▁are ▁essential ▁qualities . ▁participants ▁should ▁also ▁be ▁committed ▁team ▁players ▁eager ▁to ▁tackle ▁collaborative ▁work . ▁candidates ▁need ▁not ▁have ▁a ▁specific ▁end ▁goal ▁in ▁mind ▁for ▁the ▁program — based ▁on ▁their ▁preferences ▁and ▁the ▁opportunities ▁available ▁with ▁each ▁business , ▁participants ▁can ▁co - con struct ▁their ▁pathway ▁with ▁ hr ▁teams ▁as ▁they ▁progress ▁through ▁the ▁program . [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:1 20:2 21:2 22:3 23:3 24:4 25:5 26:6 27:7 28:8 29:9 30:9 31:10 32:11 33:12 34:13 35:14 36:15 37:16 38:17 39:18 40:19 41:20 42:21 43:21 44:22 45:23 46:24 47:25 48:26 49:27 50:28 51:29 52:30 53:31 54:32 55:33 56:34 57:34 58:34 59:35 60:36 61:37 62:38 63:39 64:40 65:41 66:42 67:43 68:44 69:44 70:45 71:46 72:47 73:47 74:47 75:47 76:48 77:49 78:50 79:51 80:51 81:52 82:53 83:54 84:55 85:56 86:57 87:58 88:58\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 28 71 973 19 4694 19 7775 21 15844 41 3018 10855 9 3809 170 77 39 2362 230 793 7456 22 6204 13295 154 9 2338 214 50 47 24 1240 239 935 25 823 28 18 367 1559 716 31 58 14319 21 18 2361 387 33 231 264 19 3809 64 830 13 1865 5976 58 13260 33 17 3563 1314 34 63 1915 135 18 367 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - *** Example ***\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - unique_id: 1000000010\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - example_index: 10\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁since ▁when ▁does ▁the ▁excellence ▁program ▁of ▁ b n p ▁ pari bas ▁exist ? [SEP] ▁ b n p ▁ pari bas ▁recruits ▁new ▁graduates ▁to ▁fulfill ▁assignments ▁lasting ▁up ▁to ▁16 ▁months ▁by ▁joining ▁the ▁group ▁through ▁one ▁of ▁its ▁international ▁subsidiaries . ▁what ▁types ▁of ▁positions ▁are ▁available ▁through ▁the ▁ vie ▁program ? ▁what ▁destinations ▁does ▁it ▁offer ? ▁what ▁types ▁of ▁applicants ▁do ▁we ▁look ▁for ? ▁let ’ s ▁take ▁a ▁closer ▁look ▁at ▁the ▁international ▁corporate ▁volunteer ▁program , ▁which ▁provides ▁a ▁chance ▁to ▁launch ▁your ▁international ▁career ▁in ▁a ▁position ▁with ▁real ▁ responsibilities . [SEP]\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 18:0 19:0 20:0 21:0 22:1 23:1 24:1 25:2 26:3 27:4 28:5 29:6 30:7 31:8 32:9 33:10 34:11 35:12 36:13 37:14 38:15 39:16 40:17 41:18 42:19 43:20 44:21 45:22 46:22 47:23 48:24 49:25 50:26 51:27 52:28 53:29 54:30 55:31 56:31 57:32 58:32 59:33 60:34 61:35 62:36 63:37 64:37 65:38 66:39 67:40 68:41 69:42 70:43 71:44 72:45 73:45 74:46 75:46 76:46 77:47 78:48 79:49 80:50 81:51 82:52 83:53 84:54 85:55 86:56 87:56 88:57 89:58 90:59 91:60 92:61 93:62 94:63 95:64 96:65 97:66 98:67 99:68 100:69 101:70 102:71 103:71 104:71\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - token_is_max_context: 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_ids: 0 196 90 358 18 12110 367 20 17 508 180 450 17 21605 7522 3438 82 0 17 508 180 450 17 21605 7522 17954 109 11225 22 9652 13814 9691 76 22 504 399 37 4391 18 256 135 65 20 81 440 19328 9 113 1971 20 2695 41 387 135 18 17 9209 367 82 113 11835 358 36 670 82 113 1971 20 10914 112 80 338 28 82 618 165 23 182 24 2492 338 38 18 440 2348 6134 367 19 59 1176 24 1116 22 2498 73 440 781 25 24 740 33 525 17 7517 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_dev_xlnet-base-cased_384\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.reader_sklearn - ***** Running evaluation *****\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.reader_sklearn - Num examples = 11\n", - "07/18/2019 09:00:33 - INFO - cdqa.reader.reader_sklearn - Batch size = 8\n", - "Evaluating: 100%|██████████| 2/2 [00:30<00:00, 17.91s/it]\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Writing predictions to: ./predictions_.json\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'business schools or engineering schools. the group serves as a committed partner of the academic' in 'business schools or engineering schools. The Group serves as a committed partner of the academic'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether' in 'talent, whether'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether it be universities, business schools' in 'talent, whether it be universities, business schools'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'talent, whether it be universities, business schools' in 'talent, whether it be universities, business schools'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'future talent, the bnp paribas group relies on its campus management team, which ensures that the' in 'future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'future talent, the' in 'future talent, the BNP'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'candidates or future talent, the bnp paribas group relies on its campus management team, which ensures that the' in 'candidates or future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'paribas group relies on its campus management team, which ensures that the bank’s initiatives carried out with certain schools in france' in 'Paribas Group relies on its Campus Management team, which ensures that the bank’s initiatives carried out with certain schools in France'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'dominique criscuolo, \"bnp' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'inique criscuolo, \"bnp pari' in 'Jean-Dominique Criscuolo, \"BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'gilles deschanel, in charge of academic partnerships at bnp' in 'Gilles Deschanel, in charge of academic partnerships at BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: ', notably internationally. gilles deschanel, in charge of academic partnerships at bnp paribas' in 'years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB,'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: ', notably internationally. gilles deschanel, in charge of academic partnerships at bnp paribas' in 'years, notably internationally. Gilles Deschanel, in charge of academic partnerships at BNP Paribas CIB,'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp pari' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp pari' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a faster rate. the program allows participants to discover the bnp paribas' in 'a faster rate. The program allows participants to discover the BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the b' in 'at a faster rate. The program allows participants to discover the BNP'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the b' in 'at a faster rate. The program allows participants to discover the BNP'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'at a faster rate. the program allows participants to discover the bnp paribas' in 'at a faster rate. The program allows participants to discover the BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp paribas' in 'January 2016, BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp paribas' in 'January 2016, BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, b' in 'January 2016, BNP'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'january 2016, bnp pari' in 'January 2016, BNP Paribas'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'career with several months of intensive immersion at a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'that is the opportunity offered to recent graduates by bnp paribas’' in 'That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group? that is the opportunity offered to recent graduates by bnp paribas’' in 'a major banking group? That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group? that' in 'a major banking group? That'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'that is the opportunity offered to recent graduates by bnp paribas’' in 'That is the opportunity offered to recent graduates by BNP Paribas’'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'a major banking group?' in 'a major banking group?'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'their talent to maintain its edge in the market. as it works to build the future of' in 'their talent to maintain its edge in the market. As it works to build the future of'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'the entity. as soon as they are hired, participants also become members of a “train' in 'the entity. As soon as they are hired, participants also become members of a “trainee'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'science, business' in 'science, business,'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'excellence program, digital and data-oriented candidates are invited to join the digital path. the digital path encourages a passionate mindset and a “digit' in 'Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'excellence program, digital and data-oriented candidates are invited to join the digital path. the digital path encourages a passionate mindset and a “' in 'Excellence Program, digital and data-oriented candidates are invited to join the Digital Path. The Digital Path encourages a passionate mindset and a “digital'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: ', determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: ', determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'participants should also be committed team players eager to tackle collaborative work. candidates need not have a specific end goal in mind for the' in 'Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'participants should also be committed team players eager to tackle collaborative work. candidates need not' in 'Participants should also be committed team players eager to tackle collaborative work. Candidates need not'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not' in 'programs, initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should' in 'programs, initiative, determination and curiosity are essential qualities. Participants should'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: ', initiative, determination and curiosity are essential qualities. participants should' in 'programs, initiative, determination and curiosity are essential qualities. Participants should'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'are essential qualities. participants should also be committed team players eager to tackle collaborative work. candidates need not have a specific end goal in mind for the program' in 'are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have a specific end goal in mind for the program—based'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'let’s take a' in 'Let’s take a'\n", - "07/18/2019 09:01:03 - INFO - cdqa.reader.utils_squad - Unable to find text: 'up to 16 months by joining the group through one of its international subsidiaries. what' in 'up to 16 months by joining the Group through one of its international subsidiaries. What'\n" - ], - "name": "stderr" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "J_72WSnDlAxn", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 734 - }, - "outputId": "59e5dbca-eb62-4ace-d6fd-a52a9ff8e2d2" - }, - "source": [ - "!ls -la" - ], - "execution_count": 21, - "outputs": [ - { - "output_type": "stream", - "text": [ - "total 546040\n", - "drwxr-xr-x 10 root root 4096 Jul 18 09:00 .\n", - "drwxr-xr-x 1 root root 4096 Jul 18 08:52 ..\n", - "-rw-r--r-- 1 root root 2 Jul 18 08:59 added_tokens.json\n", - "-rw-r--r-- 1 root root 815 Jul 18 08:52 api.py\n", - "-rw-r--r-- 1 root root 22 Jul 18 08:52 apt.txt\n", - "-rw-r--r-- 1 root root 63330 Jul 18 09:00 cached_dev_xlnet-base-cased_384\n", - "-rw-r--r-- 1 root root 101136 Jul 18 08:53 cached_train_xlnet-base-cased_384\n", - "drwxr-xr-x 7 root root 4096 Jul 18 08:52 cdqa\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 cdqa.egg-info\n", - "-rw-r--r-- 1 root root 641 Jul 18 08:59 config.json\n", - "drwxr-xr-x 3 root root 4096 Jul 18 09:00 data\n", - "-rw-r--r-- 1 root root 4854279 Jul 18 08:52 dev-v1.1.json\n", - "-rw-r--r-- 1 root root 4370528 Jul 18 08:52 dev-v2.0.json\n", - "-rw-r--r-- 1 root root 8786 Jul 18 08:53 dev-v2.0-small.json\n", - "-rw-r--r-- 1 root root 1452 Jul 18 08:52 download.py\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 examples\n", - "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .git\n", - "drwxr-xr-x 3 root root 4096 Jul 18 08:52 .github\n", - "-rw-r--r-- 1 root root 1375 Jul 18 08:52 .gitignore\n", - "-rw-r--r-- 1 root root 11356 Jul 18 08:52 LICENSE\n", - "-rw-r--r-- 1 root root 40 Jul 18 08:52 MANIFEST.in\n", - "-rw-r--r-- 1 root root 12357 Jul 18 09:01 nbest_predictions_.json\n", - "-rw-r--r-- 1 root root 723 Jul 18 09:01 null_odds_.json\n", - "-rw-r--r-- 1 root root 1349 Jul 18 09:01 predictions_.json\n", - "-rw-r--r-- 1 root root 476371987 Jul 18 08:59 pytorch_model.bin\n", - "-rw-r--r-- 1 root root 12855 Jul 18 08:52 README.md\n", - "-rw-r--r-- 1 root root 129 Jul 18 08:52 requirements.txt\n", - "drwxr-xr-x 3 root root 4096 Jul 18 08:53 runs\n", - "-rw-r--r-- 1 root root 727 Jul 18 08:52 setup.py\n", - "-rw-r--r-- 1 root root 202 Jul 18 08:59 special_tokens_map.json\n", - "-rw-r--r-- 1 root root 798011 Jul 18 08:59 spiece.model\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 tests\n", - "-rw-r--r-- 1 root root 1244 Jul 18 08:59 training_args.bin\n", - "-rw-r--r-- 1 root root 30288272 Jul 18 08:52 train-v1.1.json\n", - "-rw-r--r-- 1 root root 42123633 Jul 18 08:52 train-v2.0.json\n", - "-rw-r--r-- 1 root root 312 Jul 18 08:52 .travis.yml\n" - ], - "name": "stdout" - } - ] - }, { "cell_type": "code", "metadata": { @@ -1694,7 +1123,7 @@ "base_uri": "https://localhost:8080/", "height": 95 }, - "outputId": "fbc84da8-167e-4411-eb28-888fc56ee6f6" + "outputId": "b6cbb55f-5386-48ae-f476-c1088f162fc0" }, "source": [ "# print('query: {}'.format(query))\n", @@ -1702,7 +1131,7 @@ "print('title: {}'.format(final_prediction[1]))\n", "print('paragraph: {}'.format(final_prediction[2]))" ], - "execution_count": 22, + "execution_count": 14, "outputs": [ { "output_type": "stream", @@ -1718,100 +1147,116 @@ { "cell_type": "code", "metadata": { - "id": "0GObRQ1rJs-K", + "id": "qRwGqhHjXPeb", "colab_type": "code", + "outputId": "191b1270-3bbe-44c0-b0e1-a48042079a4f", "colab": { "base_uri": "https://localhost:8080/", - "height": 734 - }, - "outputId": "e69849f3-f520-4116-b8bf-846705f4bb9f" + "height": 36 + } }, "source": [ - "!ls -la" + "# save GPU version locally\n", + "joblib.dump(reader, os.path.join(reader.output_dir, 'xlnet_qa_vGPU.joblib'))\n", + "\n", + "# send current reader model to CPU\n", + "reader.model.to('cpu')\n", + "reader.device = torch.device('cpu')\n", + "\n", + "# save CPU it locally\n", + "joblib.dump(reader, os.path.join(reader.output_dir, 'bert_qa_vCPU.joblib'))" ], - "execution_count": 23, + "execution_count": 16, "outputs": [ { - "output_type": "stream", - "text": [ - "total 546040\n", - "drwxr-xr-x 10 root root 4096 Jul 18 09:00 .\n", - "drwxr-xr-x 1 root root 4096 Jul 18 08:52 ..\n", - "-rw-r--r-- 1 root root 2 Jul 18 08:59 added_tokens.json\n", - "-rw-r--r-- 1 root root 815 Jul 18 08:52 api.py\n", - "-rw-r--r-- 1 root root 22 Jul 18 08:52 apt.txt\n", - "-rw-r--r-- 1 root root 63330 Jul 18 09:00 cached_dev_xlnet-base-cased_384\n", - "-rw-r--r-- 1 root root 101136 Jul 18 08:53 cached_train_xlnet-base-cased_384\n", - "drwxr-xr-x 7 root root 4096 Jul 18 08:52 cdqa\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 cdqa.egg-info\n", - "-rw-r--r-- 1 root root 641 Jul 18 08:59 config.json\n", - "drwxr-xr-x 3 root root 4096 Jul 18 09:00 data\n", - "-rw-r--r-- 1 root root 4854279 Jul 18 08:52 dev-v1.1.json\n", - "-rw-r--r-- 1 root root 4370528 Jul 18 08:52 dev-v2.0.json\n", - "-rw-r--r-- 1 root root 8786 Jul 18 08:53 dev-v2.0-small.json\n", - "-rw-r--r-- 1 root root 1452 Jul 18 08:52 download.py\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 examples\n", - "drwxr-xr-x 8 root root 4096 Jul 18 08:52 .git\n", - "drwxr-xr-x 3 root root 4096 Jul 18 08:52 .github\n", - "-rw-r--r-- 1 root root 1375 Jul 18 08:52 .gitignore\n", - "-rw-r--r-- 1 root root 11356 Jul 18 08:52 LICENSE\n", - "-rw-r--r-- 1 root root 40 Jul 18 08:52 MANIFEST.in\n", - "-rw-r--r-- 1 root root 12357 Jul 18 09:01 nbest_predictions_.json\n", - "-rw-r--r-- 1 root root 723 Jul 18 09:01 null_odds_.json\n", - "-rw-r--r-- 1 root root 1349 Jul 18 09:01 predictions_.json\n", - "-rw-r--r-- 1 root root 476371987 Jul 18 08:59 pytorch_model.bin\n", - "-rw-r--r-- 1 root root 12855 Jul 18 08:52 README.md\n", - "-rw-r--r-- 1 root root 129 Jul 18 08:52 requirements.txt\n", - "drwxr-xr-x 3 root root 4096 Jul 18 08:53 runs\n", - "-rw-r--r-- 1 root root 727 Jul 18 08:52 setup.py\n", - "-rw-r--r-- 1 root root 202 Jul 18 08:59 special_tokens_map.json\n", - "-rw-r--r-- 1 root root 798011 Jul 18 08:59 spiece.model\n", - "drwxr-xr-x 2 root root 4096 Jul 18 08:52 tests\n", - "-rw-r--r-- 1 root root 1244 Jul 18 08:59 training_args.bin\n", - "-rw-r--r-- 1 root root 30288272 Jul 18 08:52 train-v1.1.json\n", - "-rw-r--r-- 1 root root 42123633 Jul 18 08:52 train-v2.0.json\n", - "-rw-r--r-- 1 root root 312 Jul 18 08:52 .travis.yml\n" - ], - "name": "stdout" + "output_type": "execute_result", + "data": { + "text/plain": [ + "['./bert_qa_vCPU.joblib']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 16 } ] }, { "cell_type": "code", "metadata": { - "id": "ikxxSgPPLP9C", + "id": "IhHmo11Cm2lI", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 289 + "height": 772 }, - "outputId": "7da9448f-9eda-4a6b-8c9c-ce91a0274ae9" + "outputId": "def065db-e2c4-4acb-93e2-3bd7cfb745c8" }, "source": [ - "!cat predictions_.json" + "!ls -la" ], - "execution_count": 24, + "execution_count": 17, "outputs": [ { "output_type": "stream", "text": [ - "{\n", - " \"100e1c8e-69f7-4f4b-9f3c-936f33bcc71e\": \"business schools\",\n", - " \"cfac01a5-98eb-4d76-8066-adb63e24751c\": \"future talent, the BNP Paribas Group relies on its Campus Management team, which ensures that the\",\n", - " \"dbba50ec-f907-46ed-9f8a-e106b17585f5\": \"Jean-Dominique Criscuolo, \\\"BNP Paribas\",\n", - " \"4cbf8827-c3a9-42c2-9bd2-fd3ca38fc2e1\": \"Gilles Deschanel, in charge of academic partnerships at BNP Paribas\",\n", - " \"9b1e4395-6554-4fba-bd80-cd9639a687f4\": \"a faster rate. The program allows participants to discover the BNP Paribas\",\n", - " \"2e266853-ed16-4fce-9701-a5d5c7005b80\": \"career with several months of intensive immersion at a major banking group? That is the opportunity offered to recent graduates by BNP Paribas\\u2019\",\n", - " \"56d2240c-0d26-4534-bf99-87aecec38523\": \"their talent to maintain its edge in the market. As it works to build the future of\",\n", - " \"719ec8c6-fd2b-4ee5-b4b5-9c77b6f7e27c\": \"the entity. As soon as they are hired, participants also become members of a \\u201ctrainee\",\n", - " \"533f1412-6289-4a83-bb96-efbae2ba508c\": \", economics\",\n", - " \"062d4586-93bf-4ab3-b03f-f15aa89f39e8\": \"initiative, determination and curiosity are essential qualities. Participants should also be committed team players eager to tackle collaborative work. Candidates need not have\",\n", - " \"60844039-fbfb-48ba-bd8a-824c0fc36935\": \"Let\\u2019s take a\"\n", - "}\n" + "total 1476568\n", + "drwxr-xr-x 10 root root 4096 Jul 18 10:22 .\n", + "drwxr-xr-x 1 root root 4096 Jul 18 10:11 ..\n", + "-rw-r--r-- 1 root root 2 Jul 18 10:18 added_tokens.json\n", + "-rw-r--r-- 1 root root 815 Jul 18 10:11 api.py\n", + "-rw-r--r-- 1 root root 22 Jul 18 10:11 apt.txt\n", + "-rw-r--r-- 1 root root 476428958 Jul 18 10:23 bert_qa_vCPU.joblib\n", + "-rw-r--r-- 1 root root 63330 Jul 18 10:19 cached_dev_xlnet-base-cased_384\n", + "-rw-r--r-- 1 root root 101136 Jul 18 10:12 cached_train_xlnet-base-cased_384\n", + "drwxr-xr-x 7 root root 4096 Jul 18 10:12 cdqa\n", + "drwxr-xr-x 2 root root 4096 Jul 18 10:12 cdqa.egg-info\n", + "-rw-r--r-- 1 root root 641 Jul 18 10:18 config.json\n", + "drwxr-xr-x 3 root root 4096 Jul 18 10:19 data\n", + "-rw-r--r-- 1 root root 4854279 Jul 18 10:12 dev-v1.1.json\n", + "-rw-r--r-- 1 root root 4370528 Jul 18 10:12 dev-v2.0.json\n", + "-rw-r--r-- 1 root root 8786 Jul 18 10:12 dev-v2.0-small.json\n", + "-rw-r--r-- 1 root root 1452 Jul 18 10:11 download.py\n", + "drwxr-xr-x 2 root root 4096 Jul 18 10:11 examples\n", + "drwxr-xr-x 8 root root 4096 Jul 18 10:11 .git\n", + "drwxr-xr-x 3 root root 4096 Jul 18 10:11 .github\n", + "-rw-r--r-- 1 root root 1375 Jul 18 10:11 .gitignore\n", + "-rw-r--r-- 1 root root 11356 Jul 18 10:11 LICENSE\n", + "-rw-r--r-- 1 root root 40 Jul 18 10:11 MANIFEST.in\n", + "-rw-r--r-- 1 root root 12357 Jul 18 10:20 nbest_predictions_.json\n", + "-rw-r--r-- 1 root root 723 Jul 18 10:20 null_odds_.json\n", + "-rw-r--r-- 1 root root 1349 Jul 18 10:20 predictions_.json\n", + "-rw-r--r-- 1 root root 476371987 Jul 18 10:18 pytorch_model.bin\n", + "-rw-r--r-- 1 root root 12855 Jul 18 10:11 README.md\n", + "-rw-r--r-- 1 root root 129 Jul 18 10:11 requirements.txt\n", + "drwxr-xr-x 3 root root 4096 Jul 18 10:12 runs\n", + "-rw-r--r-- 1 root root 727 Jul 18 10:11 setup.py\n", + "-rw-r--r-- 1 root root 202 Jul 18 10:18 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 798011 Jul 18 10:18 spiece.model\n", + "drwxr-xr-x 2 root root 4096 Jul 18 10:11 tests\n", + "-rw-r--r-- 1 root root 1180 Jul 18 10:18 training_args.bin\n", + "-rw-r--r-- 1 root root 30288272 Jul 18 10:12 train-v1.1.json\n", + "-rw-r--r-- 1 root root 42123633 Jul 18 10:12 train-v2.0.json\n", + "-rw-r--r-- 1 root root 312 Jul 18 10:11 .travis.yml\n", + "-rw-r--r-- 1 root root 476428958 Jul 18 10:22 xlnet_qa_vGPU.joblib\n" ], "name": "stdout" } ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pWV-nY5Gm3KF", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] } ] } \ No newline at end of file From 640f984ec014099fccffff6eb130a9345ae37eeb Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Thu, 18 Jul 2019 14:39:29 +0200 Subject: [PATCH 35/43] quick fix error evaluation during training --- cdqa/reader/reader_sklearn.py | 2 -- examples/tutorial-train-xlnet-squad.py | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index a8778401..d7fcccfd 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -388,8 +388,6 @@ def __init__(self, max_seq_length=384, doc_stride=128, max_query_length=64, - do_train=True, - do_eval=True, evaluate_during_training=True, do_lower_case=True, per_gpu_train_batch_size=8, diff --git a/examples/tutorial-train-xlnet-squad.py b/examples/tutorial-train-xlnet-squad.py index c6af072e..54573ca5 100644 --- a/examples/tutorial-train-xlnet-squad.py +++ b/examples/tutorial-train-xlnet-squad.py @@ -16,8 +16,10 @@ # cast Reader class with train params reader = Reader(model_type='xlnet', model_name_or_path='xlnet-base-cased', - fp16=False, - output_dir='.') + output_dir='.', + evaluate_during_training=False, + no_cuda=True, + fp16=False) # train the model reader.fit(X='train-v2.0.json') From 993ac5e4d8dc5cbb57db033d7bceae7ddbd77310 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Thu, 18 Jul 2019 14:41:10 +0200 Subject: [PATCH 36/43] fix no_cuda --- examples/tutorial-train-xlnet-squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tutorial-train-xlnet-squad.py b/examples/tutorial-train-xlnet-squad.py index 54573ca5..290f7c71 100644 --- a/examples/tutorial-train-xlnet-squad.py +++ b/examples/tutorial-train-xlnet-squad.py @@ -18,7 +18,7 @@ model_name_or_path='xlnet-base-cased', output_dir='.', evaluate_during_training=False, - no_cuda=True, + no_cuda=False, fp16=False) # train the model From e17db67712100404cc45412e08156289dbedcd5d Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Fri, 19 Jul 2019 15:39:49 +0200 Subject: [PATCH 37/43] keep basic tokenizer when using pretrained model --- cdqa/reader/reader_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index d7fcccfd..d0252dd2 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -513,7 +513,7 @@ def __init__(self, if self.pretrained_model_path: # Load a trained model and vocabulary that you have fine-tuned self.model = self.model_class.from_pretrained(self.pretrained_model_path) - self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) + # self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) self.model.to(self.device) def fit(self, X, y=None): From 8979d0285f427faccad61b271ca5926fbd6b370a Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Fri, 19 Jul 2019 16:10:28 +0200 Subject: [PATCH 38/43] add notebook tutorial predict with XLNet on custom dataset --- examples/tutorial-predict-xlnet.ipynb | 633 ++++++++++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 examples/tutorial-predict-xlnet.ipynb diff --git a/examples/tutorial-predict-xlnet.ipynb b/examples/tutorial-predict-xlnet.ipynb new file mode 100644 index 00000000..67eb39bd --- /dev/null +++ b/examples/tutorial-predict-xlnet.ipynb @@ -0,0 +1,633 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "tutorial-predict-pipeline.ipynb", + "version": "0.3.2", + "provenance": [] + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "zNtCqwveFjcK", + "colab_type": "code", + "outputId": "bebb6659-aae7-43b3-d5d8-7fd506d6fa04", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 151 + } + }, + "source": [ + "!git clone https://github.com/cdqa-suite/cdQA.git" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'cdQA'...\n", + "remote: Enumerating objects: 203, done.\u001b[K\n", + "remote: Counting objects: 100% (203/203), done.\u001b[K\n", + "remote: Compressing objects: 100% (133/133), done.\u001b[K\n", + "remote: Total 999 (delta 136), reused 131 (delta 70), pack-reused 796\u001b[K\n", + "Receiving objects: 100% (999/999), 391.69 KiB | 1.36 MiB/s, done.\n", + "Resolving deltas: 100% (603/603), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "v2XvXm4bFp7h", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "cwd = os.getcwd()\n", + "os.chdir(\"cdQA\")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "5jBtSKczGF38", + "colab_type": "code", + "outputId": "68ed6619-a262-4275-b466-292538f2748e", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 55 + } + }, + "source": [ + "!git checkout sync-huggingface" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Branch 'sync-huggingface' set up to track remote branch 'sync-huggingface' from 'origin'.\n", + "Switched to a new branch 'sync-huggingface'\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DHl2HUX1GRd6", + "colab_type": "code", + "outputId": "8609ade1-505f-4319-e488-caaa4c573376", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 170 + } + }, + "source": [ + "!pip install -q -e ." + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 133kB 9.7MB/s \n", + "\u001b[K |████████████████████████████████| 143kB 44.4MB/s \n", + "\u001b[K |████████████████████████████████| 225kB 42.5MB/s \n", + "\u001b[K |████████████████████████████████| 655kB 37.7MB/s \n", + "\u001b[K |████████████████████████████████| 1.0MB 43.3MB/s \n", + "\u001b[?25h Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for regex (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-25T14:21:08.091797Z", + "start_time": "2019-06-25T14:21:03.027877Z" + }, + "id": "umJkmO9HFf3L", + "colab_type": "code", + "outputId": "973576cb-9d0b-4348-a057-99261db16626", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 75 + } + }, + "source": [ + "import os\n", + "import torch\n", + "from sklearn.externals import joblib\n", + "from cdqa.reader.reader_sklearn import Reader" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/externals/joblib/__init__.py:15: DeprecationWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.\n", + " warnings.warn(msg, category=DeprecationWarning)\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XPItmXKSRxDb", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 343 + }, + "outputId": "cb21a27d-3bc5-4d08-e1f3-5f4ab98d149b" + }, + "source": [ + "!wget https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/pytorch_model.bin" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2019-07-19 14:01:33-- https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/pytorch_model.bin\n", + "Resolving github.com (github.com)... 140.82.118.4\n", + "Connecting to github.com (github.com)|140.82.118.4|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-8147-fbf9e537f61c?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190719%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190719T140133Z&X-Amz-Expires=300&X-Amz-Signature=ae105e392fc2e960cdd1785e1b78e6aa32a10eeaa70e676dca0e6327a8f0a449&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dpytorch_model.bin&response-content-type=application%2Foctet-stream [following]\n", + "--2019-07-19 14:01:33-- https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-8147-fbf9e537f61c?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190719%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190719T140133Z&X-Amz-Expires=300&X-Amz-Signature=ae105e392fc2e960cdd1785e1b78e6aa32a10eeaa70e676dca0e6327a8f0a449&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dpytorch_model.bin&response-content-type=application%2Foctet-stream\n", + "Resolving github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)... 52.216.170.227\n", + "Connecting to github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)|52.216.170.227|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 476375014 (454M) [application/octet-stream]\n", + "Saving to: ‘pytorch_model.bin’\n", + "\n", + "pytorch_model.bin 100%[===================>] 454.31M 36.5MB/s in 13s \n", + "\n", + "2019-07-19 14:01:47 (34.9 MB/s) - ‘pytorch_model.bin’ saved [476375014/476375014]\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wHIH_XHZjFRC", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 343 + }, + "outputId": "18d6d9bd-9019-490f-b291-3036e069a0ca" + }, + "source": [ + "!wget https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/config.json" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2019-07-19 14:04:57-- https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/config.json\n", + "Resolving github.com (github.com)... 140.82.118.3\n", + "Connecting to github.com (github.com)|140.82.118.3|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-84be-890f3b56af43?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190719%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190719T140457Z&X-Amz-Expires=300&X-Amz-Signature=221a7412e5115bc1c8cebccf6d528a3b9f0c64b13e3487e75b185135d70a2f85&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dconfig.json&response-content-type=application%2Foctet-stream [following]\n", + "--2019-07-19 14:04:57-- https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-84be-890f3b56af43?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190719%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190719T140457Z&X-Amz-Expires=300&X-Amz-Signature=221a7412e5115bc1c8cebccf6d528a3b9f0c64b13e3487e75b185135d70a2f85&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dconfig.json&response-content-type=application%2Foctet-stream\n", + "Resolving github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)... 52.216.80.24\n", + "Connecting to github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)|52.216.80.24|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 641 [application/octet-stream]\n", + "Saving to: ‘config.json’\n", + "\n", + "\rconfig.json 0%[ ] 0 --.-KB/s \rconfig.json 100%[===================>] 641 --.-KB/s in 0s \n", + "\n", + "2019-07-19 14:04:57 (18.1 MB/s) - ‘config.json’ saved [641/641]\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pWV-nY5Gm3KF", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "c1046f6e-ef80-4fcf-a8c4-c9058dc3d7e3" + }, + "source": [ + "# cast Reader class with train params\n", + "reader = Reader(model_type='xlnet',\n", + " model_name_or_path='xlnet-base-cased',\n", + " output_dir='.',\n", + " evaluate_during_training=False,\n", + " no_cuda=False,\n", + " fp16=False,\n", + " pretrained_model_path='.')" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "07/19/2019 14:05:05 - WARNING - cdqa.reader.reader_sklearn - Process rank: -1, device: cuda, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "07/19/2019 14:05:05 - INFO - pytorch_transformers.modeling_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json from cache at /root/.cache/torch/pytorch_transformers/c9cc6e53904f7f3679a31ec4af244f4419e25ebc8e71ebf8c558a31cbcf07fc8.ef1824921bc0786e97dc88d55eb17aabf18aac90f24bd34c0650529e7ba27d6f\n", + "07/19/2019 14:05:05 - INFO - pytorch_transformers.modeling_utils - Model config {\n", + " \"attn_type\": \"bi\",\n", + " \"bi_data\": false,\n", + " \"clamp_len\": -1,\n", + " \"d_head\": 64,\n", + " \"d_inner\": 3072,\n", + " \"d_model\": 768,\n", + " \"dropout\": 0.1,\n", + " \"end_n_top\": 5,\n", + " \"ff_activation\": \"gelu\",\n", + " \"finetuning_task\": null,\n", + " \"initializer_range\": 0.02,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"mem_len\": null,\n", + " \"n_head\": 12,\n", + " \"n_layer\": 12,\n", + " \"n_token\": 32000,\n", + " \"num_labels\": 2,\n", + " \"output_attentions\": false,\n", + " \"output_hidden_states\": false,\n", + " \"reuse_len\": null,\n", + " \"same_length\": false,\n", + " \"start_n_top\": 5,\n", + " \"summary_activation\": \"tanh\",\n", + " \"summary_last_dropout\": 0.1,\n", + " \"summary_type\": \"last\",\n", + " \"summary_use_proj\": true,\n", + " \"torchscript\": false,\n", + " \"untie_r\": true\n", + "}\n", + "\n", + "07/19/2019 14:05:06 - INFO - pytorch_transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8\n", + "07/19/2019 14:05:06 - INFO - pytorch_transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_transformers/24197ba0ce5dbfe23924431610704c88e2c0371afa49149360e4c823219ab474.7eac4fe898a021204e63c88c00ea68c60443c57f94b4bc3c02adbde6465745ac\n", + "07/19/2019 14:05:10 - INFO - pytorch_transformers.modeling_utils - Weights of XLNetForQuestionAnswering not initialized from pretrained model: ['start_logits.dense.weight', 'start_logits.dense.bias', 'end_logits.dense_0.weight', 'end_logits.dense_0.bias', 'end_logits.LayerNorm.weight', 'end_logits.LayerNorm.bias', 'end_logits.dense_1.weight', 'end_logits.dense_1.bias', 'answer_class.dense_0.weight', 'answer_class.dense_0.bias', 'answer_class.dense_1.weight']\n", + "07/19/2019 14:05:10 - INFO - pytorch_transformers.modeling_utils - Weights from pretrained model not used in XLNetForQuestionAnswering: ['lm_loss.weight', 'lm_loss.bias']\n", + "07/19/2019 14:05:10 - INFO - cdqa.reader.reader_sklearn - Training/evaluation parameters Reader(adam_epsilon=1e-08, cache_dir='', config_name='', do_lower_case=True,\n", + " doc_stride=128, eval_all_checkpoints=True,\n", + " evaluate_during_training=False, fp16=False, fp16_opt_level='O1',\n", + " gradient_accumulation_steps=1, learning_rate=5e-05, local_rank=-1,\n", + " logging_steps=50, max_answer_length=30, max_grad_norm=1.0,\n", + " max_query_length=64, max_seq_length=384, max_steps=-1,\n", + " model_name_or_path='xlnet-base-cased', model_type='xlnet',\n", + " n_best_size=20, no_cuda=False, null_score_diff_threshold=0.0,\n", + " num_train_epochs=3.0, output_dir='.', overwrite_cache=True,\n", + " overwrite_output_dir=True, per_gpu_eval_batch_size=8,\n", + " per_gpu_train_batch_size=8, pretrained_model_path='.', ...)\n", + "07/19/2019 14:05:10 - INFO - pytorch_transformers.modeling_utils - loading configuration file ./config.json\n", + "07/19/2019 14:05:10 - INFO - pytorch_transformers.modeling_utils - Model config {\n", + " \"attn_type\": \"bi\",\n", + " \"bi_data\": false,\n", + " \"clamp_len\": -1,\n", + " \"d_head\": 64,\n", + " \"d_inner\": 3072,\n", + " \"d_model\": 768,\n", + " \"dropout\": 0.1,\n", + " \"end_n_top\": 5,\n", + " \"ff_activation\": \"gelu\",\n", + " \"finetuning_task\": null,\n", + " \"initializer_range\": 0.02,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"mem_len\": null,\n", + " \"n_head\": 12,\n", + " \"n_layer\": 12,\n", + " \"n_token\": 32000,\n", + " \"num_labels\": 2,\n", + " \"output_attentions\": false,\n", + " \"output_hidden_states\": false,\n", + " \"reuse_len\": null,\n", + " \"same_length\": false,\n", + " \"start_n_top\": 5,\n", + " \"summary_activation\": \"tanh\",\n", + " \"summary_last_dropout\": 0.1,\n", + " \"summary_type\": \"last\",\n", + " \"summary_use_proj\": true,\n", + " \"torchscript\": false,\n", + " \"untie_r\": true\n", + "}\n", + "\n", + "07/19/2019 14:05:10 - INFO - pytorch_transformers.modeling_utils - loading weights file ./pytorch_model.bin\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uizhui4vbRT7", + "colab_type": "code", + "outputId": "4ddd25c8-1cb1-49cd-be65-993c3bb895c8", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 55 + } + }, + "source": [ + "import os\n", + "import wget\n", + "\n", + "def download_bnp_data():\n", + " directory = 'data/bnpp_newsroom_v1.1'\n", + " url = 'https://github.com/cdqa-suite/cdQA/releases/download/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv'\n", + "\n", + " print(\"\\nDownloading BNP data...\")\n", + "\n", + " if not os.path.exists(directory):\n", + " os.makedirs(directory)\n", + "\n", + " wget.download(url=url, out=directory)\n", + "\n", + "download_bnp_data()" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\n", + "Downloading BNP data...\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AxTkvynSbOyh", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import pandas as pd\n", + "from ast import literal_eval\n", + "from cdqa.utils.filters import filter_paragraphs\n", + "\n", + "df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv', converters={'paragraphs': literal_eval})\n", + "df = filter_paragraphs(df)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "KQ9Be2rzZYQb", + "colab_type": "code", + "outputId": "76b21f2a-a689-4f60-eea5-76799e33fe0c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 189 + } + }, + "source": [ + "from cdqa.utils.converters import generate_squad_examples\n", + "from cdqa.retriever.tfidf_sklearn import TfidfRetriever\n", + "\n", + "query = 'Who is Jean-Laurent Bonnafé?'\n", + "\n", + "metadata = df\n", + "metadata['content'] = metadata['paragraphs'].apply(lambda x: ' '.join(x))\n", + "\n", + "retriever = TfidfRetriever(verbose=True)\n", + "retriever.fit(metadata['content'])\n", + "closest_docs_indices = retriever.predict(query, metadata=metadata)\n", + "\n", + "squad_examples = generate_squad_examples(question=query,\n", + " closest_docs_indices=closest_docs_indices,\n", + " metadata=metadata)" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "text": [ + "3it [00:00, 2033.11it/s]" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "+------+-------+---------------------------------------------------------+\n", + "| rank | index | title |\n", + "+------+-------+---------------------------------------------------------+\n", + "| 1 | 759 | Back on Hello Tomorrow Global Summit 2016 |\n", + "| 2 | 611 | BNP Paribas wishes to become carbon neutral by end-2017 |\n", + "| 3 | 1266 | Jean-Laurent Bonnafé named top CEO in banking sector |\n", + "+------+-------+---------------------------------------------------------+\n", + "Time: 0.0076 seconds\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9YSGLgDUhruS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "4287d704-65c6-471e-9682-bde31b1f90fb" + }, + "source": [ + "# train the model\n", + "out_eval, final_prediction = reader.predict(X=squad_examples)" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "text": [ + "07/19/2019 14:07:40 - INFO - cdqa.reader.reader_sklearn - Creating features from dataset file at [{'title': 'Back on Hello Tomorrow Global Summit 2016', 'paragraphs': [{'context': \"As actors and facilitators of change, BNP Paribas et L’Atelier BNP Paribas are privileged partners of Hello Tomorrow Global Summit, held on 13 and 14 October. \\r\\nRelive that great moment of exchange, which particularly welcomed Jean-Laurent Bonnafé, Director and CEO of the Group, and Jacques d'Estais, Deputy Chief Operating Officer and Head of International Financial Services.\", 'qas': [{'answers': [], 'question': 'Who is Jean-Laurent Bonnafé?', 'id': 'e8725a52-b104-4f4d-a42e-0fdbccb51a33'}]}]}, {'title': 'BNP Paribas wishes to become carbon neutral by end-2017', 'paragraphs': [{'context': 'Jean-Laurent Bonnafé, BNP Paribas Chief Executive Officer said: “Over the last few years, we’ve taken some significant steps with our climate-related policies. For instance, we doubled the funds earmarked for financing in the renewable energy field – to €15 billion by 2020 – and also decided to cease financing coal-fired power plant projects. This new target of making our own operations ‘carbon-neutral’ will enable us to take our contribution to limiting global warming a stage further.”', 'qas': [{'answers': [], 'question': 'Who is Jean-Laurent Bonnafé?', 'id': '6024c3fa-0236-413f-8e67-c46158b34058'}]}]}, {'title': 'Jean-Laurent Bonnafé named top CEO in banking sector', 'paragraphs': [{'context': 'Extel 2014 Survey (16,000 professionals in the financial sector): Jean-Laurent Bonnafé has been ranked No. 1 manager (out of 86) in the European banking sector in the “CEO - Banking Sector” Category in Europe. Lars Machenil, BNP Paribas’ Chief Financial Officer, is ranked second (out of 93) in the “CFO - Banking Sector” Category. In addition, the bank is ranked No. 3 (out of 105) for the quality of its investor relations with the financial market.', 'qas': [{'answers': [], 'question': 'Who is Jean-Laurent Bonnafé?', 'id': '1dd6ca38-7ff6-41c8-a4af-41ff80e85a08'}]}, {'context': 'Following the publication of this ranking, Jean-Laurent Bonnafé made the following comments : “The leadership position achieved by BNP Paribas rewards its global performance, as perceived by financial analysts. It also demonstrates the quality of the relations and financial communications that we maintain with all the operators in the financial sector. This is excellent news, especially at the current time. The credit goes to all the BNP Paribas Group teams”.', 'qas': [{'answers': [], 'question': 'Who is Jean-Laurent Bonnafé?', 'id': 'b7dda590-525d-4d56-938d-9b05b01bedba'}]}, {'context': 'The Extel (a Thomson Reuters Division) Survey is a leading benchmark survey for the financial survey. It is based on the votes of professionals in the financial sector. This year, the survey was conducted between 24 March and 7 May. This year’s survey gathered votes from more than 15,000 buyside professionals representing more than 2,000 funds, 2,500 sellside professionals from 270 brokerage firms and more than 1,000 investment professionals from nearly 800 corporates. The sample group of voters in this survey is very representative of the financial sector, so the ranking is closely followed, particularly by analysts and brokers.', 'qas': [{'answers': [], 'question': 'Who is Jean-Laurent Bonnafé?', 'id': 'c285e1ca-9009-4333-bc70-1132a1cfc0bd'}]}]}]\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - unique_id: 1000000000\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - example_index: 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁is ▁ je an - lau rent ▁ bon na fe ? [SEP] ▁as ▁actors ▁and ▁facilitator s ▁of ▁change , ▁ b n p ▁ pari bas ▁ et ▁ l ’ ate lier ▁ b n p ▁ pari bas ▁are ▁privileged ▁partners ▁of ▁hello ▁tomorrow ▁global ▁summit , ▁held ▁on ▁13 ▁and ▁14 ▁ oc to ber . ▁ re live ▁that ▁great ▁moment ▁of ▁exchange , ▁which ▁particularly ▁welcomed ▁ je an - lau rent ▁ bon na fe , ▁director ▁and ▁ ce o ▁of ▁the ▁group , ▁and ▁ jac que s ▁ d ' esta is , ▁deputy ▁chief ▁operating ▁officer ▁and ▁head ▁of ▁international ▁financial ▁services . [SEP]\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:3 20:4 21:5 22:5 23:6 24:6 25:6 26:6 27:7 28:7 29:7 30:8 31:8 32:9 33:9 34:9 35:9 36:9 37:10 38:10 39:10 40:10 41:11 42:11 43:11 44:12 45:13 46:14 47:15 48:16 49:17 50:18 51:19 52:19 53:20 54:21 55:22 56:23 57:24 58:25 59:25 60:25 61:25 62:25 63:26 64:26 65:26 66:27 67:28 68:29 69:30 70:31 71:31 72:32 73:33 74:34 75:35 76:35 77:35 78:35 79:35 80:35 81:36 82:36 83:36 84:36 85:36 86:37 87:38 88:39 89:39 90:39 91:40 92:41 93:42 94:42 95:43 96:44 97:44 98:44 99:44 100:45 101:45 102:45 103:45 104:45 105:45 106:46 107:47 108:48 109:49 110:50 111:51 112:52 113:53 114:54 115:55 116:55\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 27 17 2554 262 13 9951 9663 17 4769 597 4018 82 0 34 6454 21 30181 23 20 459 19 17 508 180 450 17 21605 7522 17 993 17 368 165 1167 8805 17 508 180 450 17 21605 7522 41 20334 3221 20 24717 4305 1150 2519 19 355 31 646 21 613 17 3374 261 2266 9 17 88 8032 29 312 1070 20 1725 19 59 1446 6442 17 2554 262 13 9951 9663 17 4769 597 4018 19 748 21 17 1138 155 20 18 256 19 21 17 21690 1895 23 17 66 26 19641 590 19 3071 735 2028 1674 21 291 20 440 638 472 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - unique_id: 1000000001\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - example_index: 1\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁is ▁ je an - lau rent ▁ bon na fe ? [SEP] ▁ je an - lau rent ▁ bon na fe , ▁ b n p ▁ pari bas ▁chief ▁executive ▁officer ▁said : ▁“ over ▁the ▁last ▁few ▁years , ▁we ’ ve ▁taken ▁some ▁significant ▁steps ▁with ▁our ▁climate - related ▁policies . ▁for ▁instance , ▁we ▁doubled ▁the ▁funds ▁earmarked ▁for ▁financing ▁in ▁the ▁renewable ▁energy ▁field ▁ – ▁to ▁ € 15 ▁billion ▁by ▁2020 ▁ – ▁and ▁also ▁decided ▁to ▁cease ▁financing ▁coal - fired ▁power ▁plant ▁projects . ▁this ▁new ▁target ▁of ▁making ▁our ▁own ▁operations ▁‘ carbon - neutral ’ ▁will ▁enable ▁us ▁to ▁take ▁our ▁contribution ▁to ▁limiting ▁global ▁warming ▁a ▁stage ▁further . ” [SEP]\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:0 17:0 18:0 19:0 20:0 21:1 22:1 23:1 24:1 25:1 26:2 27:2 28:2 29:2 30:3 31:3 32:3 33:4 34:5 35:6 36:7 37:7 38:8 39:8 40:9 41:10 42:11 43:12 44:12 45:13 46:13 47:13 48:14 49:15 50:16 51:17 52:18 53:19 54:20 55:20 56:20 57:21 58:21 59:22 60:23 61:23 62:24 63:25 64:26 65:27 66:28 67:29 68:30 69:31 70:32 71:33 72:34 73:35 74:36 75:36 76:37 77:38 78:38 79:38 80:39 81:40 82:41 83:42 84:42 85:43 86:44 87:45 88:46 89:47 90:48 91:49 92:49 93:49 94:50 95:51 96:52 97:52 98:53 99:54 100:55 101:56 102:57 103:58 104:59 105:60 106:61 107:61 108:61 109:61 110:61 111:62 112:63 113:64 114:65 115:66 116:67 117:68 118:69 119:70 120:71 121:72 122:73 123:74 124:75 125:75 126:75\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 27 17 2554 262 13 9951 9663 17 4769 597 4018 82 0 17 2554 262 13 9951 9663 17 4769 597 4018 19 17 508 180 450 17 21605 7522 735 1317 1674 42 60 221 2249 18 129 274 123 19 80 165 189 572 106 1376 2094 33 120 2749 13 3361 2099 9 28 3431 19 80 9302 18 1660 27786 28 5303 25 18 12185 861 770 17 14 22 17 16 1522 337 37 15765 17 14 21 77 969 22 5951 5303 4780 13 18874 350 1649 1526 9 52 109 1983 20 441 120 224 1354 2302 19542 13 24734 165 53 4520 211 22 182 120 5313 22 12597 1150 8220 24 1269 608 9 407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - unique_id: 1000000002\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - example_index: 2\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁is ▁ je an - lau rent ▁ bon na fe ? [SEP] ▁ex tel ▁2014 ▁survey ▁ ( 1 6,000 ▁professionals ▁in ▁the ▁financial ▁sector ) : ▁ je an - lau rent ▁ bon na fe ▁has ▁been ▁ranked ▁no . ▁1 ▁manager ▁ ( out ▁of ▁86 ) ▁in ▁the ▁ european ▁banking ▁sector ▁in ▁the ▁“ ce o ▁ - ▁banking ▁sector ” ▁category ▁in ▁euro pe . ▁ lar s ▁ ma chen il , ▁ b n p ▁ pari bas ’ ▁chief ▁financial ▁officer , ▁is ▁ranked ▁second ▁ ( out ▁of ▁93 ) ▁in ▁the ▁“ c fo ▁ - ▁banking ▁sector ” ▁category . ▁in ▁addition , ▁the ▁bank ▁is ▁ranked ▁no . ▁3 ▁ ( out ▁of ▁105 ) ▁for ▁the ▁quality ▁of ▁its ▁investor ▁relations ▁with ▁the ▁financial ▁market . [SEP]\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:0 17:1 18:2 19:3 20:3 21:3 22:3 23:4 24:5 25:6 26:7 27:8 28:8 29:8 30:9 31:9 32:9 33:9 34:9 35:9 36:10 37:10 38:10 39:10 40:11 41:12 42:13 43:14 44:14 45:15 46:16 47:17 48:17 49:17 50:18 51:19 52:19 53:20 54:21 55:22 56:22 57:23 58:24 59:25 60:26 61:27 62:27 63:27 64:28 65:28 66:29 67:30 68:30 69:31 70:32 71:33 72:33 73:33 74:34 75:34 76:34 77:35 78:35 79:35 80:35 81:35 82:36 83:36 84:36 85:36 86:37 87:37 88:37 89:37 90:38 91:39 92:40 93:40 94:41 95:42 96:43 97:44 98:44 99:44 100:45 101:46 102:46 103:47 104:48 105:49 106:49 107:49 108:50 109:50 110:51 111:52 112:52 113:53 114:53 115:54 116:55 117:55 118:56 119:57 120:58 121:59 122:60 123:60 124:61 125:62 126:62 127:62 128:63 129:64 130:64 131:65 132:66 133:67 134:68 135:69 136:70 137:71 138:72 139:73 140:74 141:75 142:75\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True 139:True 140:True 141:True 142:True\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 27 17 2554 262 13 9951 9663 17 4769 597 4018 82 0 2002 4258 2502 2342 17 10 174 8726 4301 25 18 638 1967 11 60 17 2554 262 13 9951 9663 17 4769 597 4018 51 72 4766 116 9 156 1416 17 10 1281 20 11235 11 25 18 17 30707 4236 1967 25 18 221 1138 155 17 13 4236 1967 407 3242 25 2926 1590 9 17 4225 23 17 661 8258 902 19 17 508 180 450 17 21605 7522 165 735 638 1674 19 27 4766 205 17 10 1281 20 12306 11 25 18 221 369 6571 17 13 4236 1967 407 3242 9 25 864 19 18 1013 27 4766 116 9 198 17 10 1281 20 12614 11 28 18 882 20 81 8146 1704 33 18 638 344 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - unique_id: 1000000003\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - example_index: 3\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁is ▁ je an - lau rent ▁ bon na fe ? [SEP] ▁following ▁the ▁publication ▁of ▁this ▁ranking , ▁ je an - lau rent ▁ bon na fe ▁made ▁the ▁following ▁comments ▁ : ▁“ the ▁leadership ▁position ▁achieved ▁by ▁ b n p ▁ pari bas ▁rewards ▁its ▁global ▁performance , ▁as ▁perceived ▁by ▁financial ▁analysts . ▁it ▁also ▁demonstrates ▁the ▁quality ▁of ▁the ▁relations ▁and ▁financial ▁communications ▁that ▁we ▁maintain ▁with ▁all ▁the ▁operators ▁in ▁the ▁financial ▁sector . ▁this ▁is ▁excellent ▁news , ▁especially ▁at ▁the ▁current ▁time . ▁the ▁credit ▁goes ▁to ▁all ▁the ▁ b n p ▁ pari bas ▁group ▁teams ” . [SEP]\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:2 18:3 19:4 20:5 21:5 22:6 23:6 24:6 25:6 26:6 27:6 28:7 29:7 30:7 31:7 32:8 33:9 34:10 35:11 36:12 37:12 38:13 39:13 40:14 41:15 42:16 43:17 44:18 45:18 46:18 47:18 48:19 49:19 50:19 51:20 52:21 53:22 54:23 55:23 56:24 57:25 58:26 59:27 60:28 61:28 62:29 63:30 64:31 65:32 66:33 67:34 68:35 69:36 70:37 71:38 72:39 73:40 74:41 75:42 76:43 77:44 78:45 79:46 80:47 81:48 82:49 83:50 84:50 85:51 86:52 87:53 88:54 89:54 90:55 91:56 92:57 93:58 94:59 95:59 96:60 97:61 98:62 99:63 100:64 101:65 102:66 103:66 104:66 105:66 106:67 107:67 108:67 109:68 110:69 111:69 112:69\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 27 17 2554 262 13 9951 9663 17 4769 597 4018 82 0 405 18 3638 20 52 7055 19 17 2554 262 13 9951 9663 17 4769 597 4018 140 18 405 1992 17 60 221 305 2041 740 3741 37 17 508 180 450 17 21605 7522 13405 81 1150 922 19 34 8634 37 638 2604 9 36 77 14800 18 882 20 18 1704 21 638 3964 29 80 2224 33 71 18 7466 25 18 638 1967 9 52 27 2712 546 19 941 38 18 604 92 9 18 734 1565 22 71 18 17 508 180 450 17 21605 7522 256 1314 407 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - *** Example ***\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - unique_id: 1000000004\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - example_index: 4\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - doc_span_index: 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - tokens: [CLS] ▁who ▁is ▁ je an - lau rent ▁ bon na fe ? [SEP] ▁the ▁ex tel ▁ ( a ▁ t hom son ▁ re uter s ▁division ) ▁survey ▁is ▁a ▁leading ▁benchmark ▁survey ▁for ▁the ▁financial ▁survey . ▁it ▁is ▁based ▁on ▁the ▁votes ▁of ▁professionals ▁in ▁the ▁financial ▁sector . ▁this ▁year , ▁the ▁survey ▁was ▁conducted ▁between ▁24 ▁march ▁and ▁7 ▁may . ▁this ▁year ’ s ▁survey ▁gathered ▁votes ▁from ▁more ▁than ▁15,000 ▁buy side ▁professionals ▁representing ▁more ▁than ▁2,000 ▁funds , ▁2,500 ▁sell side ▁professionals ▁from ▁27 0 ▁brokerage ▁firms ▁and ▁more ▁than ▁1,000 ▁investment ▁professionals ▁from ▁nearly ▁800 ▁corporate s . ▁the ▁sample ▁group ▁of ▁voters ▁in ▁this ▁survey ▁is ▁very ▁representative ▁of ▁the ▁financial ▁sector , ▁so ▁the ▁ranking ▁is ▁closely ▁followed , ▁particularly ▁by ▁analysts ▁and ▁brokers . [SEP]\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_to_orig_map: 15:0 16:1 17:1 18:2 19:2 20:2 21:3 22:3 23:3 24:3 25:4 26:4 27:4 28:4 29:5 30:5 31:6 32:7 33:8 34:9 35:10 36:11 37:12 38:13 39:14 40:15 41:15 42:16 43:17 44:18 45:19 46:20 47:21 48:22 49:23 50:24 51:25 52:26 53:27 54:27 55:28 56:29 57:29 58:30 59:31 60:32 61:33 62:34 63:35 64:36 65:37 66:38 67:39 68:39 69:40 70:41 71:41 72:41 73:42 74:43 75:44 76:45 77:46 78:47 79:48 80:49 81:49 82:50 83:51 84:52 85:53 86:54 87:55 88:55 89:56 90:57 91:57 92:58 93:59 94:60 95:60 96:61 97:62 98:63 99:64 100:65 101:66 102:67 103:68 104:69 105:70 106:71 107:72 108:72 109:72 110:73 111:74 112:75 113:76 114:77 115:78 116:79 117:80 118:81 119:82 120:83 121:84 122:85 123:86 124:87 125:87 126:88 127:89 128:90 129:91 130:92 131:93 132:93 133:94 134:95 135:96 136:97 137:98 138:98\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - token_is_max_context: 15:True 16:True 17:True 18:True 19:True 20:True 21:True 22:True 23:True 24:True 25:True 26:True 27:True 28:True 29:True 30:True 31:True 32:True 33:True 34:True 35:True 36:True 37:True 38:True 39:True 40:True 41:True 42:True 43:True 44:True 45:True 46:True 47:True 48:True 49:True 50:True 51:True 52:True 53:True 54:True 55:True 56:True 57:True 58:True 59:True 60:True 61:True 62:True 63:True 64:True 65:True 66:True 67:True 68:True 69:True 70:True 71:True 72:True 73:True 74:True 75:True 76:True 77:True 78:True 79:True 80:True 81:True 82:True 83:True 84:True 85:True 86:True 87:True 88:True 89:True 90:True 91:True 92:True 93:True 94:True 95:True 96:True 97:True 98:True 99:True 100:True 101:True 102:True 103:True 104:True 105:True 106:True 107:True 108:True 109:True 110:True 111:True 112:True 113:True 114:True 115:True 116:True 117:True 118:True 119:True 120:True 121:True 122:True 123:True 124:True 125:True 126:True 127:True 128:True 129:True 130:True 131:True 132:True 133:True 134:True 135:True 136:True 137:True 138:True\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_ids: 0 61 27 17 2554 262 13 9951 9663 17 4769 597 4018 82 0 18 2002 4258 17 10 101 17 46 7969 672 17 88 12105 23 2069 11 2342 27 24 895 5655 2342 28 18 638 2342 9 36 27 515 31 18 2873 20 4301 25 18 638 1967 9 52 119 19 18 2342 30 2496 161 923 7024 21 425 132 9 52 119 165 23 2342 3994 2873 40 70 100 12198 971 1943 4301 4471 70 100 6309 1660 19 16555 1523 1943 4301 40 1514 279 12250 3647 21 70 100 3823 1257 4301 40 896 6216 2348 23 9 18 4561 256 20 2326 25 52 2342 27 172 3581 20 18 638 1967 19 102 18 7055 27 3126 1060 19 1446 37 2604 21 12604 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.utils_squad - segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.reader_sklearn - Saving features into cached file cached_dev_xlnet-base-cased_384\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.reader_sklearn - ***** Running evaluation *****\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.reader_sklearn - Num examples = 5\n", + "07/19/2019 14:07:40 - INFO - cdqa.reader.reader_sklearn - Batch size = 8\n", + "Evaluating: 100%|██████████| 1/1 [00:00<00:00, 1.57it/s]\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Writing predictions to: ./predictions_.json\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'bnp paribas et l’atelier bnp paribas are privileged partners of hello tomorrow' in 'BNP Paribas et L’Atelier BNP Paribas are privileged partners of Hello Tomorrow'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'bnp paribas et l’atelier bnp paribas' in 'BNP Paribas et L’Atelier BNP Paribas'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'jean-laurent bonnafe, director and ceo of the group' in 'Jean-Laurent Bonnafé, Director and CEO of the Group,'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'paribas et l’atelier bnp paribas are privileged partners' in 'Paribas et L’Atelier BNP Paribas are privileged partners'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'paribas et l’atelier' in 'Paribas et L’Atelier'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'paribas et l’atelier bnp paribas are privileged partners of hello tomorrow global summit,' in 'Paribas et L’Atelier BNP Paribas are privileged partners of Hello Tomorrow Global Summit,'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'paribas et l’atelier bnp paribas are privileged partners of hello tomorrow global summit' in 'Paribas et L’Atelier BNP Paribas are privileged partners of Hello Tomorrow Global Summit,'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'jean-laurent bonnafe' in 'Jean-Laurent Bonnafé,'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'jean-laurent bonnafe, bnp' in 'Jean-Laurent Bonnafé, BNP'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'jean-laurent bonnafe, bnp' in 'Jean-Laurent Bonnafé, BNP'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: '“cfo - banking sector” category.' in '“CFO - Banking Sector” Category.'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'jean-laurent bonnafe' in 'Jean-Laurent Bonnafé'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: '“ceo - banking sector” category' in '“CEO - Banking Sector” Category'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'jean-laurent bonnafe made the following comments : “the leadership position achieved by bnp paribas' in 'Jean-Laurent Bonnafé made the following comments : “The leadership position achieved by BNP Paribas'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'this year’s survey' in 'This year’s survey'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'the extel (a thomson reuters division)' in 'The Extel (a Thomson Reuters Division)'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'the extel (a thomson reuters division) survey' in 'The Extel (a Thomson Reuters Division) Survey'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'the extel (a thomson reuters division) survey' in 'The Extel (a Thomson Reuters Division) Survey'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'extel (a thomson reuters division)' in 'Extel (a Thomson Reuters Division)'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'votes from more than 15,000 buyside professionals representing more than 2,000 funds' in 'votes from more than 15,000 buyside professionals representing more than 2,000 funds,'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'the extel (a thomson reuters division)' in 'The Extel (a Thomson Reuters Division)'\n", + "07/19/2019 14:07:41 - INFO - cdqa.reader.utils_squad - Unable to find text: 'extel (a thomson reuters division) survey' in 'Extel (a Thomson Reuters Division) Survey'\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "irjokX-mQvmY", + "colab_type": "code", + "outputId": "6e185b2d-61c4-48cd-8c40-8cfa144bcb6f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 113 + } + }, + "source": [ + "# print('query: {}'.format(query))\n", + "print('answer: {}'.format(final_prediction[0]))\n", + "print('title: {}'.format(final_prediction[1]))\n", + "print('paragraph: {}'.format(final_prediction[2]))" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "text": [ + "answer: BNP Paribas\n", + "title: Back on Hello Tomorrow Global Summit 2016\n", + "paragraph: As actors and facilitators of change, BNP Paribas et L’Atelier BNP Paribas are privileged partners of Hello Tomorrow Global Summit, held on 13 and 14 October. \r\n", + "Relive that great moment of exchange, which particularly welcomed Jean-Laurent Bonnafé, Director and CEO of the Group, and Jacques d'Estais, Deputy Chief Operating Officer and Head of International Financial Services.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qRwGqhHjXPeb", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# # save GPU version locally\n", + "# joblib.dump(reader, os.path.join(reader.output_dir, 'xlnet_qa_vGPU.joblib'))\n", + "\n", + "# # send current reader model to CPU\n", + "# reader.model.to('cpu')\n", + "# reader.device = torch.device('cpu')\n", + "\n", + "# # save CPU it locally\n", + "# joblib.dump(reader, os.path.join(reader.output_dir, 'bert_qa_vCPU.joblib'))" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From c3d8a7a4b467a03904bfa049250dc4a0b197596c Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Wed, 31 Jul 2019 09:47:11 +0200 Subject: [PATCH 39/43] sync with latest HF changes --- cdqa/reader/hf_original_examples/run_squad.py | 26 ++++++++++++------- cdqa/reader/reader_sklearn.py | 26 ++++++++++++------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/cdqa/reader/hf_original_examples/run_squad.py b/cdqa/reader/hf_original_examples/run_squad.py index d72d67b8..7d768d2c 100644 --- a/cdqa/reader/hf_original_examples/run_squad.py +++ b/cdqa/reader/hf_original_examples/run_squad.py @@ -101,6 +101,16 @@ def train(args, train_dataset, model, tokenizer): raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) + # multi-gpu training (should be after apex fp16 initialization) + if args.n_gpu > 1: + model = torch.nn.DataParallel(model) + + # Distributed training (should be after apex fp16 initialization) + if args.local_rank != -1: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], + output_device=args.local_rank, + find_unused_parameters=True) + # Train! logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) @@ -129,8 +139,8 @@ def train(args, train_dataset, model, tokenizer): if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[5], 'p_mask': batch[6]}) - ouputs = model(**inputs) - loss = ouputs[0] # model outputs are always tuple in pytorch-transformers (see doc) + outputs = model(**inputs) + loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) if args.n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training @@ -235,7 +245,10 @@ def evaluate(args, model, tokenizer, prefix=""): # Compute predictions output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) - output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + if args.version_2_with_negative: + output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + else: + output_null_log_odds_file = None if args.model_type in ['xlnet', 'xlm']: # XLNet uses a more complex post-processing procedure @@ -450,14 +463,7 @@ def main(): if args.local_rank == 0: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - # Distributed and parrallel training model.to(args.device) - if args.local_rank != -1: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], - output_device=args.local_rank, - find_unused_parameters=True) - elif args.n_gpu > 1: - model = torch.nn.DataParallel(model) logger.info("Training/evaluation parameters %s", args) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index d0252dd2..0f4e327d 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -103,6 +103,16 @@ def train(args, train_dataset, model, tokenizer): raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) + # multi-gpu training (should be after apex fp16 initialization) + if args.n_gpu > 1: + model = torch.nn.DataParallel(model) + + # Distributed training (should be after apex fp16 initialization) + if args.local_rank != -1: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], + output_device=args.local_rank, + find_unused_parameters=True) + # Train! logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) @@ -131,8 +141,8 @@ def train(args, train_dataset, model, tokenizer): if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[5], 'p_mask': batch[6]}) - ouputs = model(**inputs) - loss = ouputs[0] # model outputs are always tuple in pytorch-transformers (see doc) + outputs = model(**inputs) + loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) if args.n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training @@ -237,7 +247,10 @@ def evaluate(input_file, args, model, tokenizer, prefix=""): # Compute predictions output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) - output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + if args.version_2_with_negative: + output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + else: + output_null_log_odds_file = None if args.model_type in ['xlnet', 'xlm']: # XLNet uses a more complex post-processing procedure @@ -499,14 +512,7 @@ def __init__(self, if self.local_rank == 0: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - # Distributed and parrallel training self.model.to(self.device) - if self.local_rank != -1: - self.model = torch.nn.parallel.DistributedDataParallel(self.model, device_ids=[self.local_rank], - output_device=self.local_rank, - find_unused_parameters=True) - elif self.n_gpu > 1: - self.model = torch.nn.DataParallel(self.model) logger.info("Training/evaluation parameters %s", self) From 8276470658c85467605f3133bd273c4e1f90e052 Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Fri, 9 Aug 2019 16:47:51 +0200 Subject: [PATCH 40/43] sync latest HF changes --- cdqa/reader/hf_original_examples/run_squad.py | 8 +++++++- cdqa/reader/reader_sklearn.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/cdqa/reader/hf_original_examples/run_squad.py b/cdqa/reader/hf_original_examples/run_squad.py index 7d768d2c..f0ae9169 100644 --- a/cdqa/reader/hf_original_examples/run_squad.py +++ b/cdqa/reader/hf_original_examples/run_squad.py @@ -138,7 +138,7 @@ def train(args, train_dataset, model, tokenizer): 'end_positions': batch[4]} if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[5], - 'p_mask': batch[6]}) + 'p_mask': batch[6]}) outputs = model(**inputs) loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) @@ -272,6 +272,9 @@ def evaluate(args, model, tokenizer, prefix=""): def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): + if args.local_rank not in [-1, 0]: + torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache + # Load data features from cache or dataset file input_file = args.predict_file if evaluate else args.train_file cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( @@ -296,6 +299,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal logger.info("Saving features into cached file %s", cached_features_file) torch.save(features, cached_features_file) + if args.local_rank == 0: + torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache + # Convert to Tensors and build dataset all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 0f4e327d..7bd02fcd 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -140,7 +140,7 @@ def train(args, train_dataset, model, tokenizer): 'end_positions': batch[4]} if args.model_type in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[5], - 'p_mask': batch[6]}) + 'p_mask': batch[6]}) outputs = model(**inputs) loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) @@ -274,6 +274,9 @@ def evaluate(input_file, args, model, tokenizer, prefix=""): def load_and_cache_examples(input_file, args, tokenizer, evaluate=False, output_examples=False): + if args.local_rank not in [-1, 0]: + torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache + # Load data features from cache or dataset file cached_features_file = os.path.join(os.path.dirname(input_file) if isinstance(input_file, str) else '', 'cached_{}_{}_{}'.format( 'dev' if evaluate else 'train', @@ -297,6 +300,9 @@ def load_and_cache_examples(input_file, args, tokenizer, evaluate=False, output_ logger.info("Saving features into cached file %s", cached_features_file) torch.save(features, cached_features_file) + if args.local_rank == 0: + torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache + # Convert to Tensors and build dataset all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) From 2eeb34a41e8f9bf77c8e6ee0edef6b52b4a40e83 Mon Sep 17 00:00:00 2001 From: Andre Farias Date: Sun, 25 Aug 2019 13:48:20 +0200 Subject: [PATCH 41/43] added verbose_logging option + reformatted with black --- cdqa/reader/reader_sklearn.py | 942 ++++++++++++++++++++++------------ 1 file changed, 624 insertions(+), 318 deletions(-) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 7bd02fcd..90c16913 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -25,25 +25,35 @@ import numpy as np import torch -from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, - TensorDataset) +from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm, trange from tensorboardX import SummaryWriter -from pytorch_transformers import (WEIGHTS_NAME, BertConfig, - BertForQuestionAnswering, BertTokenizer, - XLMConfig, XLMForQuestionAnswering, - XLMTokenizer, XLNetConfig, - XLNetForQuestionAnswering, - XLNetTokenizer) +from pytorch_transformers import ( + WEIGHTS_NAME, + BertConfig, + BertForQuestionAnswering, + BertTokenizer, + XLMConfig, + XLMForQuestionAnswering, + XLMTokenizer, + XLNetConfig, + XLNetForQuestionAnswering, + XLNetTokenizer, +) from pytorch_transformers import AdamW, WarmupLinearSchedule -from cdqa.reader.utils_squad import (read_squad_examples, convert_examples_to_features, - RawResult, write_predictions, - RawResultExtended, write_predictions_extended) +from cdqa.reader.utils_squad import ( + read_squad_examples, + convert_examples_to_features, + RawResult, + write_predictions, + RawResultExtended, + write_predictions_extended, +) # The follwing import is the official SQuAD evaluation script (2.0). # You can remove it from the dependencies if you are using this script outside of the library @@ -54,15 +64,21 @@ logger = logging.getLogger(__name__) -ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) \ - for conf in (BertConfig, XLNetConfig, XLMConfig)), ()) +ALL_MODELS = sum( + ( + tuple(conf.pretrained_config_archive_map.keys()) + for conf in (BertConfig, XLNetConfig, XLMConfig) + ), + (), +) MODEL_CLASSES = { - 'bert': (BertConfig, BertForQuestionAnswering, BertTokenizer), - 'xlnet': (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer), - 'xlm': (XLMConfig, XLMForQuestionAnswering, XLMTokenizer), + "bert": (BertConfig, BertForQuestionAnswering, BertTokenizer), + "xlnet": (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer), + "xlm": (XLMConfig, XLMForQuestionAnswering, XLMTokenizer), } + def set_seed(args): random.seed(args.seed) np.random.seed(args.seed) @@ -70,38 +86,76 @@ def set_seed(args): if args.n_gpu > 0: torch.cuda.manual_seed_all(args.seed) + def to_list(tensor): return tensor.detach().cpu().tolist() -def train(args, train_dataset, model, tokenizer): + +def train(args, train_dataset, model, tokenizer, verbose_logging=False): """ Train the model """ if args.local_rank in [-1, 0]: tb_writer = SummaryWriter() args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) - train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset) - train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) + train_sampler = ( + RandomSampler(train_dataset) + if args.local_rank == -1 + else DistributedSampler(train_dataset) + ) + train_dataloader = DataLoader( + train_dataset, sampler=train_sampler, batch_size=args.train_batch_size + ) if args.max_steps > 0: t_total = args.max_steps - args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1 + args.num_train_epochs = ( + args.max_steps + // (len(train_dataloader) // args.gradient_accumulation_steps) + + 1 + ) else: - t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs + t_total = ( + len(train_dataloader) + // args.gradient_accumulation_steps + * args.num_train_epochs + ) # Prepare optimizer and schedule (linear warmup and decay) - no_decay = ['bias', 'LayerNorm.weight'] + no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ - {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, - {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} - ] - optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) - scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total) + { + "params": [ + p + for n, p in model.named_parameters() + if not any(nd in n for nd in no_decay) + ], + "weight_decay": args.weight_decay, + }, + { + "params": [ + p + for n, p in model.named_parameters() + if any(nd in n for nd in no_decay) + ], + "weight_decay": 0.0, + }, + ] + optimizer = AdamW( + optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon + ) + scheduler = WarmupLinearSchedule( + optimizer, warmup_steps=args.warmup_steps, t_total=t_total + ) if args.fp16: try: from apex import amp except ImportError: - raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") - model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) + raise ImportError( + "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." + ) + model, optimizer = amp.initialize( + model, optimizer, opt_level=args.fp16_opt_level + ) # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: @@ -109,50 +163,73 @@ def train(args, train_dataset, model, tokenizer): # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], - output_device=args.local_rank, - find_unused_parameters=True) + model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[args.local_rank], + output_device=args.local_rank, + find_unused_parameters=True, + ) # Train! - logger.info("***** Running training *****") - logger.info(" Num examples = %d", len(train_dataset)) - logger.info(" Num Epochs = %d", args.num_train_epochs) - logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) - logger.info(" Total train batch size (w. parallel, distributed & accumulation) = %d", - args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1)) - logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) - logger.info(" Total optimization steps = %d", t_total) + if verbose_logging: + logger.info("***** Running training *****") + logger.info(" Num examples = %d", len(train_dataset)) + logger.info(" Num Epochs = %d", args.num_train_epochs) + logger.info( + " Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size + ) + logger.info( + " Total train batch size (w. parallel, distributed & accumulation) = %d", + args.train_batch_size + * args.gradient_accumulation_steps + * (torch.distributed.get_world_size() if args.local_rank != -1 else 1), + ) + logger.info( + " Gradient Accumulation steps = %d", args.gradient_accumulation_steps + ) + logger.info(" Total optimization steps = %d", t_total) global_step = 0 tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() - train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]) + train_iterator = trange( + int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0] + ) set_seed(args) # Added here for reproductibility (even between python 2 and 3) for _ in train_iterator: - epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) + epoch_iterator = tqdm( + train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0] + ) for step, batch in enumerate(epoch_iterator): model.train() batch = tuple(t.to(args.device) for t in batch) - inputs = {'input_ids': batch[0], - 'attention_mask': batch[1], - 'token_type_ids': None if args.model_type == 'xlm' else batch[2], - 'start_positions': batch[3], - 'end_positions': batch[4]} - if args.model_type in ['xlnet', 'xlm']: - inputs.update({'cls_index': batch[5], - 'p_mask': batch[6]}) + inputs = { + "input_ids": batch[0], + "attention_mask": batch[1], + "token_type_ids": None if args.model_type == "xlm" else batch[2], + "start_positions": batch[3], + "end_positions": batch[4], + } + if args.model_type in ["xlnet", "xlm"]: + inputs.update({"cls_index": batch[5], "p_mask": batch[6]}) outputs = model(**inputs) - loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) + loss = outputs[ + 0 + ] # model outputs are always tuple in pytorch-transformers (see doc) if args.n_gpu > 1: - loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training + loss = ( + loss.mean() + ) # mean() to average on multi-gpu parallel (not distributed) training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() - torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) + torch.nn.utils.clip_grad_norm_( + amp.master_params(optimizer), args.max_grad_norm + ) else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) @@ -164,25 +241,46 @@ def train(args, train_dataset, model, tokenizer): model.zero_grad() global_step += 1 - if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0: + if ( + args.local_rank in [-1, 0] + and args.logging_steps > 0 + and global_step % args.logging_steps == 0 + ): # Log metrics - if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well + if ( + args.local_rank == -1 and args.evaluate_during_training + ): # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): - tb_writer.add_scalar('eval_{}'.format(key), value, global_step) - tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step) - tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args.logging_steps, global_step) + tb_writer.add_scalar( + "eval_{}".format(key), value, global_step + ) + tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) + tb_writer.add_scalar( + "loss", + (tr_loss - logging_loss) / args.logging_steps, + global_step, + ) logging_loss = tr_loss - if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: + if ( + args.local_rank in [-1, 0] + and args.save_steps > 0 + and global_step % args.save_steps == 0 + ): # Save model checkpoint - output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step)) + output_dir = os.path.join( + args.output_dir, "checkpoint-{}".format(global_step) + ) if not os.path.exists(output_dir): os.makedirs(output_dir) - model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training + model_to_save = ( + model.module if hasattr(model, "module") else model + ) # Take care of distributed/parallel training model_to_save.save_pretrained(output_dir) - torch.save(args, os.path.join(output_dir, 'training_args.bin')) - logger.info("Saving model checkpoint to %s", output_dir) + torch.save(args, os.path.join(output_dir, "training_args.bin")) + if verbose_logging: + logger.info("Saving model checkpoint to %s", output_dir) if args.max_steps > 0 and global_step > args.max_steps: epoch_iterator.close() @@ -197,105 +295,175 @@ def train(args, train_dataset, model, tokenizer): return global_step, tr_loss / global_step -def evaluate(input_file, args, model, tokenizer, prefix=""): - dataset, examples, features = load_and_cache_examples(input_file, args, tokenizer, evaluate=True, output_examples=True) +def evaluate(input_file, args, model, tokenizer, prefix="", verbose_logging=False): + dataset, examples, features = load_and_cache_examples( + input_file, + args, + tokenizer, + evaluate=True, + output_examples=True, + verbose_logging=verbose_logging, + ) if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly - eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset) - eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) + eval_sampler = ( + SequentialSampler(dataset) + if args.local_rank == -1 + else DistributedSampler(dataset) + ) + eval_dataloader = DataLoader( + dataset, sampler=eval_sampler, batch_size=args.eval_batch_size + ) # Eval! - logger.info("***** Running evaluation {} *****".format(prefix)) - logger.info(" Num examples = %d", len(dataset)) - logger.info(" Batch size = %d", args.eval_batch_size) + if verbose_logging: + logger.info("***** Running evaluation {} *****".format(prefix)) + logger.info(" Num examples = %d", len(dataset)) + logger.info(" Batch size = %d", args.eval_batch_size) all_results = [] for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): - inputs = {'input_ids': batch[0], - 'attention_mask': batch[1], - 'token_type_ids': None if args.model_type == 'xlm' else batch[2] # XLM don't use segment_ids - } + inputs = { + "input_ids": batch[0], + "attention_mask": batch[1], + "token_type_ids": None + if args.model_type == "xlm" + else batch[2], # XLM don't use segment_ids + } example_indices = batch[3] - if args.model_type in ['xlnet', 'xlm']: - inputs.update({'cls_index': batch[4], - 'p_mask': batch[5]}) + if args.model_type in ["xlnet", "xlm"]: + inputs.update({"cls_index": batch[4], "p_mask": batch[5]}) outputs = model(**inputs) for i, example_index in enumerate(example_indices): eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) - if args.model_type in ['xlnet', 'xlm']: + if args.model_type in ["xlnet", "xlm"]: # XLNet uses a more complex post-processing procedure - result = RawResultExtended(unique_id = unique_id, - start_top_log_probs = to_list(outputs[0][i]), - start_top_index = to_list(outputs[1][i]), - end_top_log_probs = to_list(outputs[2][i]), - end_top_index = to_list(outputs[3][i]), - cls_logits = to_list(outputs[4][i])) + result = RawResultExtended( + unique_id=unique_id, + start_top_log_probs=to_list(outputs[0][i]), + start_top_index=to_list(outputs[1][i]), + end_top_log_probs=to_list(outputs[2][i]), + end_top_index=to_list(outputs[3][i]), + cls_logits=to_list(outputs[4][i]), + ) else: - result = RawResult(unique_id = unique_id, - start_logits = to_list(outputs[0][i]), - end_logits = to_list(outputs[1][i])) + result = RawResult( + unique_id=unique_id, + start_logits=to_list(outputs[0][i]), + end_logits=to_list(outputs[1][i]), + ) all_results.append(result) - + # Compute predictions - output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) - output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) + output_prediction_file = os.path.join( + args.output_dir, "predictions_{}.json".format(prefix) + ) + output_nbest_file = os.path.join( + args.output_dir, "nbest_predictions_{}.json".format(prefix) + ) if args.version_2_with_negative: - output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) + output_null_log_odds_file = os.path.join( + args.output_dir, "null_odds_{}.json".format(prefix) + ) else: output_null_log_odds_file = None - if args.model_type in ['xlnet', 'xlm']: + if args.model_type in ["xlnet", "xlm"]: # XLNet uses a more complex post-processing procedure - write_predictions_extended(examples, features, all_results, args.n_best_size, - args.max_answer_length, output_prediction_file, - output_nbest_file, output_null_log_odds_file, input_file, - model.config.start_n_top, model.config.end_n_top, - args.version_2_with_negative, tokenizer, args.verbose_logging) + write_predictions_extended( + examples, + features, + all_results, + args.n_best_size, + args.max_answer_length, + output_prediction_file, + output_nbest_file, + output_null_log_odds_file, + input_file, + model.config.start_n_top, + model.config.end_n_top, + args.version_2_with_negative, + tokenizer, + args.verbose_logging, + ) else: - write_predictions(examples, features, all_results, args.n_best_size, - args.max_answer_length, args.do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, args.verbose_logging, - args.version_2_with_negative, args.null_score_diff_threshold) + write_predictions( + examples, + features, + all_results, + args.n_best_size, + args.max_answer_length, + args.do_lower_case, + output_prediction_file, + output_nbest_file, + output_null_log_odds_file, + args.verbose_logging, + args.version_2_with_negative, + args.null_score_diff_threshold, + ) # Evaluate with the official SQuAD script - evaluate_options = EVAL_OPTS(data_file=input_file, - pred_file=output_prediction_file, - na_prob_file=output_null_log_odds_file) + evaluate_options = EVAL_OPTS( + data_file=input_file, + pred_file=output_prediction_file, + na_prob_file=output_null_log_odds_file, + ) results = evaluate_on_squad(evaluate_options) return results -def load_and_cache_examples(input_file, args, tokenizer, evaluate=False, output_examples=False): +def load_and_cache_examples( + input_file, + args, + tokenizer, + evaluate=False, + output_examples=False, + verbose_logging=False, +): if args.local_rank not in [-1, 0]: torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache # Load data features from cache or dataset file - cached_features_file = os.path.join(os.path.dirname(input_file) if isinstance(input_file, str) else '', 'cached_{}_{}_{}'.format( - 'dev' if evaluate else 'train', - list(filter(None, args.model_name_or_path.split('/'))).pop(), - str(args.max_seq_length))) - if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples: - logger.info("Loading features from cached file %s", cached_features_file) + cached_features_file = os.path.join( + os.path.dirname(input_file) if isinstance(input_file, str) else "", + "cached_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, args.model_name_or_path.split("/"))).pop(), + str(args.max_seq_length), + ), + ) + if ( + os.path.exists(cached_features_file) + and not args.overwrite_cache + and not output_examples + ): + if verbose_logging: + logger.info("Loading features from cached file %s", cached_features_file) features = torch.load(cached_features_file) else: - logger.info("Creating features from dataset file at %s", input_file) - examples = read_squad_examples(input_file=input_file, - is_training=not evaluate, - version_2_with_negative=args.version_2_with_negative) - features = convert_examples_to_features(examples=examples, - tokenizer=tokenizer, - max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, - max_query_length=args.max_query_length, - is_training=not evaluate) + if verbose_logging: + logger.info("Creating features from dataset file at %s", input_file) + examples = read_squad_examples( + input_file=input_file, + is_training=not evaluate, + version_2_with_negative=args.version_2_with_negative, + ) + features = convert_examples_to_features( + examples=examples, + tokenizer=tokenizer, + max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, + max_query_length=args.max_query_length, + is_training=not evaluate, + ) if args.local_rank in [-1, 0]: logger.info("Saving features into cached file %s", cached_features_file) torch.save(features, cached_features_file) @@ -311,247 +479,358 @@ def load_and_cache_examples(input_file, args, tokenizer, evaluate=False, output_ all_p_mask = torch.tensor([f.p_mask for f in features], dtype=torch.float) if evaluate: all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) - dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, - all_example_index, all_cls_index, all_p_mask) + dataset = TensorDataset( + all_input_ids, + all_input_mask, + all_segment_ids, + all_example_index, + all_cls_index, + all_p_mask, + ) else: - all_start_positions = torch.tensor([f.start_position for f in features], dtype=torch.long) - all_end_positions = torch.tensor([f.end_position for f in features], dtype=torch.long) - dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, - all_start_positions, all_end_positions, - all_cls_index, all_p_mask) + all_start_positions = torch.tensor( + [f.start_position for f in features], dtype=torch.long + ) + all_end_positions = torch.tensor( + [f.end_position for f in features], dtype=torch.long + ) + dataset = TensorDataset( + all_input_ids, + all_input_mask, + all_segment_ids, + all_start_positions, + all_end_positions, + all_cls_index, + all_p_mask, + ) if output_examples: return dataset, examples, features return dataset -def predict(input_file, args, model, tokenizer, prefix=""): - dataset, examples, features = load_and_cache_examples(input_file, args, tokenizer, evaluate=True, output_examples=True) +def predict(input_file, args, model, tokenizer, prefix="", verbose_logging=False): + dataset, examples, features = load_and_cache_examples( + input_file, + args, + tokenizer, + evaluate=True, + output_examples=True, + verbose_logging=verbose_logging, + ) if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly - eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset) - eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) + eval_sampler = ( + SequentialSampler(dataset) + if args.local_rank == -1 + else DistributedSampler(dataset) + ) + eval_dataloader = DataLoader( + dataset, sampler=eval_sampler, batch_size=args.eval_batch_size + ) # Eval! - logger.info("***** Running evaluation {} *****".format(prefix)) - logger.info(" Num examples = %d", len(dataset)) - logger.info(" Batch size = %d", args.eval_batch_size) + if verbose_logging: + logger.info("***** Running evaluation {} *****".format(prefix)) + logger.info(" Num examples = %d", len(dataset)) + logger.info(" Batch size = %d", args.eval_batch_size) all_results = [] for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): - inputs = {'input_ids': batch[0], - 'token_type_ids': None if args.model_type == 'xlm' else batch[1], # XLM don't use segment_ids - 'attention_mask': batch[2]} + inputs = { + "input_ids": batch[0], + "token_type_ids": None + if args.model_type == "xlm" + else batch[1], # XLM don't use segment_ids + "attention_mask": batch[2], + } example_indices = batch[3] - if args.model_type in ['xlnet', 'xlm']: - inputs.update({'cls_index': batch[4], - 'p_mask': batch[5]}) + if args.model_type in ["xlnet", "xlm"]: + inputs.update({"cls_index": batch[4], "p_mask": batch[5]}) outputs = model(**inputs) for i, example_index in enumerate(example_indices): eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) - if args.model_type in ['xlnet', 'xlm']: + if args.model_type in ["xlnet", "xlm"]: # XLNet uses a more complex post-processing procedure - result = RawResultExtended(unique_id = unique_id, - start_top_log_probs = to_list(outputs[0][i]), - start_top_index = to_list(outputs[1][i]), - end_top_log_probs = to_list(outputs[2][i]), - end_top_index = to_list(outputs[3][i]), - cls_logits = to_list(outputs[4][i])) + result = RawResultExtended( + unique_id=unique_id, + start_top_log_probs=to_list(outputs[0][i]), + start_top_index=to_list(outputs[1][i]), + end_top_log_probs=to_list(outputs[2][i]), + end_top_index=to_list(outputs[3][i]), + cls_logits=to_list(outputs[4][i]), + ) else: - result = RawResult(unique_id = unique_id, - start_logits = to_list(outputs[0][i]), - end_logits = to_list(outputs[1][i])) + result = RawResult( + unique_id=unique_id, + start_logits=to_list(outputs[0][i]), + end_logits=to_list(outputs[1][i]), + ) all_results.append(result) - - # Compute predictions - output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) - output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(prefix)) - output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(prefix)) - if args.model_type in ['xlnet', 'xlm']: + # Compute predictions + output_prediction_file = os.path.join( + args.output_dir, "predictions_{}.json".format(prefix) + ) + output_nbest_file = os.path.join( + args.output_dir, "nbest_predictions_{}.json".format(prefix) + ) + output_null_log_odds_file = os.path.join( + args.output_dir, "null_odds_{}.json".format(prefix) + ) + + if args.model_type in ["xlnet", "xlm"]: # XLNet uses a more complex post-processing procedure - out_eval, final_prediction = write_predictions_extended(examples, features, all_results, args.n_best_size, - args.max_answer_length, output_prediction_file, - output_nbest_file, output_null_log_odds_file, input_file, - model.config.start_n_top, model.config.end_n_top, - args.version_2_with_negative, tokenizer, args.verbose_logging) + out_eval, final_prediction = write_predictions_extended( + examples, + features, + all_results, + args.n_best_size, + args.max_answer_length, + output_prediction_file, + output_nbest_file, + output_null_log_odds_file, + input_file, + model.config.start_n_top, + model.config.end_n_top, + args.version_2_with_negative, + tokenizer, + args.verbose_logging, + ) else: - write_predictions(examples, features, all_results, args.n_best_size, - args.max_answer_length, args.do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, args.verbose_logging, - args.version_2_with_negative, args.null_score_diff_threshold) + write_predictions( + examples, + features, + all_results, + args.n_best_size, + args.max_answer_length, + args.do_lower_case, + output_prediction_file, + output_nbest_file, + output_null_log_odds_file, + args.verbose_logging, + args.version_2_with_negative, + args.null_score_diff_threshold, + ) return out_eval, final_prediction + class Reader(BaseEstimator): """ """ - def __init__(self, - model_type=None, - model_name_or_path=None, - output_dir=None, - config_name="", - tokenizer_name="", - cache_dir="", - version_2_with_negative=True, - null_score_diff_threshold=0.0, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - evaluate_during_training=True, - do_lower_case=True, - per_gpu_train_batch_size=8, - per_gpu_eval_batch_size=8, - learning_rate=5e-5, - gradient_accumulation_steps=1, - weight_decay=0.0, - adam_epsilon=1e-8, - max_grad_norm=1.0, - num_train_epochs=3.0, - max_steps=-1, - warmup_steps=0, - n_best_size=20, - max_answer_length=30, - verbose_logging=True, - logging_steps=50, - save_steps=50, - eval_all_checkpoints=True, - no_cuda=True, - overwrite_output_dir=True, - overwrite_cache=True, - seed=42, - local_rank=-1, - fp16=True, - fp16_opt_level='O1', - server_ip='', - server_port='', - pretrained_model_path=None): - - self.model_type = model_type - self.model_name_or_path = model_name_or_path - self.output_dir = output_dir - self.config_name = config_name - self.tokenizer_name = tokenizer_name - self.cache_dir = cache_dir - self.version_2_with_negative = version_2_with_negative - self.null_score_diff_threshold = null_score_diff_threshold - self.max_seq_length = max_seq_length - self.doc_stride = doc_stride - self.max_query_length = max_query_length - self.evaluate_during_training = evaluate_during_training - self.do_lower_case = do_lower_case - self.per_gpu_train_batch_size = per_gpu_train_batch_size - self.per_gpu_eval_batch_size = per_gpu_eval_batch_size - self.learning_rate = learning_rate - self.gradient_accumulation_steps = gradient_accumulation_steps - self.weight_decay = weight_decay - self.adam_epsilon = adam_epsilon - self.max_grad_norm = max_grad_norm - self.num_train_epochs = num_train_epochs - self.max_steps = max_steps - self.warmup_steps = warmup_steps - self.n_best_size = n_best_size - self.max_answer_length = max_answer_length - self.verbose_logging = verbose_logging - self.logging_steps = logging_steps - self.save_steps = save_steps - self.eval_all_checkpoints = eval_all_checkpoints - self.no_cuda = no_cuda - self.overwrite_output_dir = overwrite_output_dir - self.overwrite_cache = overwrite_cache - self.seed = seed - self.local_rank = local_rank - self.fp16 = fp16 - self.fp16_opt_level = fp16_opt_level - self.server_ip = server_ip - self.server_port = server_port - self.pretrained_model_path = pretrained_model_path - - # Setup distant debugging if needed - if self.server_ip and self.server_port: - # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script - import ptvsd - print("Waiting for debugger attach") - ptvsd.enable_attach(address=(self.server_ip, self.server_port), redirect_output=True) - ptvsd.wait_for_attach() - - # Setup CUDA, GPU & distributed training - if self.local_rank == -1 or self.no_cuda: - device = torch.device("cuda" if torch.cuda.is_available() and not self.no_cuda else "cpu") - self.n_gpu = torch.cuda.device_count() - else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs - torch.cuda.set_device(self.local_rank) - device = torch.device("cuda", self.local_rank) - torch.distributed.init_process_group(backend='nccl') - self.n_gpu = 1 - self.device = device - - # Setup logging - logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.INFO if self.local_rank in [-1, 0] else logging.WARN) - logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", - self.local_rank, device, self.n_gpu, bool(self.local_rank != -1), self.fp16) - - # Set seed - set_seed(self) - - # Load pretrained model and tokenizer - if self.local_rank not in [-1, 0]: - torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - - self.model_type = self.model_type.lower() - config_class, self.model_class, tokenizer_class = MODEL_CLASSES[self.model_type] - config = config_class.from_pretrained(self.config_name if self.config_name else self.model_name_or_path) - self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name if self.tokenizer_name else self.model_name_or_path, do_lower_case=self.do_lower_case) - self.model = self.model_class.from_pretrained(self.model_name_or_path, from_tf=bool('.ckpt' in self.model_name_or_path), config=config) - - if self.local_rank == 0: - torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - - self.model.to(self.device) - + def __init__( + self, + model_type=None, + model_name_or_path=None, + output_dir=None, + config_name="", + tokenizer_name="", + cache_dir="", + version_2_with_negative=True, + null_score_diff_threshold=0.0, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + evaluate_during_training=True, + do_lower_case=True, + per_gpu_train_batch_size=8, + per_gpu_eval_batch_size=8, + learning_rate=5e-5, + gradient_accumulation_steps=1, + weight_decay=0.0, + adam_epsilon=1e-8, + max_grad_norm=1.0, + num_train_epochs=3.0, + max_steps=-1, + warmup_steps=0, + n_best_size=20, + max_answer_length=30, + verbose_logging=False, + logging_steps=50, + save_steps=50, + eval_all_checkpoints=True, + no_cuda=True, + overwrite_output_dir=True, + overwrite_cache=True, + seed=42, + local_rank=-1, + fp16=True, + fp16_opt_level="O1", + server_ip="", + server_port="", + pretrained_model_path=None, + ): + + self.model_type = model_type + self.model_name_or_path = model_name_or_path + self.output_dir = output_dir + self.config_name = config_name + self.tokenizer_name = tokenizer_name + self.cache_dir = cache_dir + self.version_2_with_negative = version_2_with_negative + self.null_score_diff_threshold = null_score_diff_threshold + self.max_seq_length = max_seq_length + self.doc_stride = doc_stride + self.max_query_length = max_query_length + self.evaluate_during_training = evaluate_during_training + self.do_lower_case = do_lower_case + self.per_gpu_train_batch_size = per_gpu_train_batch_size + self.per_gpu_eval_batch_size = per_gpu_eval_batch_size + self.learning_rate = learning_rate + self.gradient_accumulation_steps = gradient_accumulation_steps + self.weight_decay = weight_decay + self.adam_epsilon = adam_epsilon + self.max_grad_norm = max_grad_norm + self.num_train_epochs = num_train_epochs + self.max_steps = max_steps + self.warmup_steps = warmup_steps + self.n_best_size = n_best_size + self.max_answer_length = max_answer_length + self.verbose_logging = verbose_logging + self.logging_steps = logging_steps + self.save_steps = save_steps + self.eval_all_checkpoints = eval_all_checkpoints + self.no_cuda = no_cuda + self.overwrite_output_dir = overwrite_output_dir + self.overwrite_cache = overwrite_cache + self.seed = seed + self.local_rank = local_rank + self.fp16 = fp16 + self.fp16_opt_level = fp16_opt_level + self.server_ip = server_ip + self.server_port = server_port + self.pretrained_model_path = pretrained_model_path + + # Setup distant debugging if needed + if self.server_ip and self.server_port: + # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script + import ptvsd + + print("Waiting for debugger attach") + ptvsd.enable_attach( + address=(self.server_ip, self.server_port), redirect_output=True + ) + ptvsd.wait_for_attach() + + # Setup CUDA, GPU & distributed training + if self.local_rank == -1 or self.no_cuda: + device = torch.device( + "cuda" if torch.cuda.is_available() and not self.no_cuda else "cpu" + ) + self.n_gpu = torch.cuda.device_count() + else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs + torch.cuda.set_device(self.local_rank) + device = torch.device("cuda", self.local_rank) + torch.distributed.init_process_group(backend="nccl") + self.n_gpu = 1 + self.device = device + + # Setup logging + if self.verbose_logging: + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + level=logging.INFO if self.local_rank in [-1, 0] else logging.WARN, + ) + logger.warning( + "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", + self.local_rank, + device, + self.n_gpu, + bool(self.local_rank != -1), + self.fp16, + ) + + # Set seed + set_seed(self) + + # Load pretrained model and tokenizer + if self.local_rank not in [-1, 0]: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab + + self.model_type = self.model_type.lower() + config_class, self.model_class, tokenizer_class = MODEL_CLASSES[self.model_type] + config = config_class.from_pretrained( + self.config_name if self.config_name else self.model_name_or_path + ) + self.tokenizer = tokenizer_class.from_pretrained( + self.tokenizer_name if self.tokenizer_name else self.model_name_or_path, + do_lower_case=self.do_lower_case, + ) + self.model = self.model_class.from_pretrained( + self.model_name_or_path, + from_tf=bool(".ckpt" in self.model_name_or_path), + config=config, + ) + + if self.local_rank == 0: + torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab + + self.model.to(self.device) + if self.verbose_logging: logger.info("Training/evaluation parameters %s", self) - if self.pretrained_model_path: - # Load a trained model and vocabulary that you have fine-tuned - self.model = self.model_class.from_pretrained(self.pretrained_model_path) - # self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) - self.model.to(self.device) + if self.pretrained_model_path: + # Load a trained model and vocabulary that you have fine-tuned + self.model = self.model_class.from_pretrained(self.pretrained_model_path) + # self.tokenizer = tokenizer_class.from_pretrained(self.pretrained_model_path) + self.model.to(self.device) def fit(self, X, y=None): - if os.path.exists(self.output_dir) and os.listdir(self.output_dir) and not self.overwrite_output_dir: - raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(self.output_dir)) - - train_dataset = load_and_cache_examples(input_file=X, args=self, tokenizer=self.tokenizer, evaluate=False, output_examples=False) - global_step, tr_loss = train(self, train_dataset, self.model, self.tokenizer) - logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) + if ( + os.path.exists(self.output_dir) + and os.listdir(self.output_dir) + and not self.overwrite_output_dir + ): + raise ValueError( + "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format( + self.output_dir + ) + ) + + train_dataset = load_and_cache_examples( + input_file=X, + args=self, + tokenizer=self.tokenizer, + evaluate=False, + output_examples=False, + ) + global_step, tr_loss = train( + self, train_dataset, self.model, self.tokenizer, self.verbose_logging + ) + if self.verbose_logging: + logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Save the trained model and the tokenizer if self.local_rank == -1 or torch.distributed.get_rank() == 0: # Create output directory if needed if not os.path.exists(self.output_dir) and self.local_rank in [-1, 0]: os.makedirs(self.output_dir) - - logger.info("Saving model checkpoint to %s", self.output_dir) + if self.verbose_logging: + logger.info("Saving model checkpoint to %s", self.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` - model_to_save = self.model.module if hasattr(self.model, 'module') else self.model # Take care of distributed/parallel training + model_to_save = ( + self.model.module if hasattr(self.model, "module") else self.model + ) # Take care of distributed/parallel training model_to_save.save_pretrained(self.output_dir) self.tokenizer.save_pretrained(self.output_dir) # Good practice: save your training arguments together with the trained model - torch.save(self.get_params(), os.path.join(self.output_dir, 'training_args.bin')) + torch.save( + self.get_params(), os.path.join(self.output_dir, "training_args.bin") + ) return self @@ -562,29 +841,56 @@ def evaluate(self, X): if self.local_rank in [-1, 0]: checkpoints = [self.output_dir] if self.eval_all_checkpoints: - checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(self.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) - logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs - - logger.info("Evaluate the following checkpoints: %s", checkpoints) - + checkpoints = list( + os.path.dirname(c) + for c in sorted( + glob.glob( + self.output_dir + "/**/" + WEIGHTS_NAME, recursive=True + ) + ) + ) + if self.verbose_logging: + logging.getLogger("pytorch_transformers.modeling_utils").setLevel( + logging.WARN + ) # Reduce model loading logs + if self.verbose_logging: + logger.info("Evaluate the following checkpoints: %s", checkpoints) + for checkpoint in checkpoints: # Reload the model - global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" + global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else "" self.model = self.model_class.from_pretrained(checkpoint) self.model.to(self.device) # Evaluate - result = evaluate(input_file=X, args=self, model=self.model, tokenizer=self.tokenizer, prefix=global_step) - - result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) + result = evaluate( + input_file=X, + args=self, + model=self.model, + tokenizer=self.tokenizer, + prefix=global_step, + verbose_logging=self.verbose_logging, + ) + + result = dict( + (k + ("_{}".format(global_step) if global_step else ""), v) + for k, v in result.items() + ) results.update(result) - - logger.info("Results: {}".format(results)) + if self.verbose_logging: + logger.info("Results: {}".format(results)) return results def predict(self, X): - out_eval, final_prediction = predict(input_file=X, args=self, model=self.model, tokenizer=self.tokenizer, prefix="") + out_eval, final_prediction = predict( + input_file=X, + args=self, + model=self.model, + tokenizer=self.tokenizer, + prefix="", + verbose_logging=self.verbose_logging, + ) return out_eval, final_prediction From eab6401b7885b3e8d8875c3ce83f4ac39d73ca4d Mon Sep 17 00:00:00 2001 From: Andre Farias Date: Sun, 25 Aug 2019 14:01:32 +0200 Subject: [PATCH 42/43] added last verbose conditions --- cdqa/reader/reader_sklearn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdqa/reader/reader_sklearn.py b/cdqa/reader/reader_sklearn.py index 90c16913..7e4eedbf 100644 --- a/cdqa/reader/reader_sklearn.py +++ b/cdqa/reader/reader_sklearn.py @@ -805,6 +805,7 @@ def fit(self, X, y=None): tokenizer=self.tokenizer, evaluate=False, output_examples=False, + verbose_logging=self.verbose_logging ) global_step, tr_loss = train( self, train_dataset, self.model, self.tokenizer, self.verbose_logging From 660760c8c429138a8e48470fb7a220947e6d0a4b Mon Sep 17 00:00:00 2001 From: fmikaelian <39884124+fmikaelian@users.noreply.github.com> Date: Sat, 7 Sep 2019 15:27:53 +0200 Subject: [PATCH 43/43] add colab notebook for xlnet eval on squad 2.0 --- examples/tutorial-eval-xlnet-squad2.0.ipynb | 283 ++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 examples/tutorial-eval-xlnet-squad2.0.ipynb diff --git a/examples/tutorial-eval-xlnet-squad2.0.ipynb b/examples/tutorial-eval-xlnet-squad2.0.ipynb new file mode 100644 index 00000000..67085c7b --- /dev/null +++ b/examples/tutorial-eval-xlnet-squad2.0.ipynb @@ -0,0 +1,283 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "tutorial-predict-pipeline.ipynb", + "version": "0.3.2", + "provenance": [] + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "zNtCqwveFjcK", + "colab_type": "code", + "outputId": "6a94d325-b50a-4874-a999-59702327dcbe", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 151 + } + }, + "source": [ + "!git clone https://github.com/cdqa-suite/cdQA.git" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'cdQA'...\n", + "remote: Enumerating objects: 61, done.\u001b[K\n", + "remote: Counting objects: 100% (61/61), done.\u001b[K\n", + "remote: Compressing objects: 100% (49/49), done.\u001b[K\n", + "remote: Total 1138 (delta 28), reused 35 (delta 12), pack-reused 1077\u001b[K\n", + "Receiving objects: 100% (1138/1138), 441.88 KiB | 1.10 MiB/s, done.\n", + "Resolving deltas: 100% (686/686), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "v2XvXm4bFp7h", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "cwd = os.getcwd()\n", + "os.chdir(\"cdQA\")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "5jBtSKczGF38", + "colab_type": "code", + "outputId": "d657fe20-985d-4fc8-b435-794e17f77748", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 55 + } + }, + "source": [ + "!git checkout sync-huggingface" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Branch 'sync-huggingface' set up to track remote branch 'sync-huggingface' from 'origin'.\n", + "Switched to a new branch 'sync-huggingface'\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DHl2HUX1GRd6", + "colab_type": "code", + "outputId": "625ba318-f7e5-4f24-98a5-cb7cbacbf175", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 170 + } + }, + "source": [ + "!pip install -q -e ." + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 133kB 4.2MB/s \n", + "\u001b[K |████████████████████████████████| 163kB 43.6MB/s \n", + "\u001b[K |████████████████████████████████| 225kB 45.6MB/s \n", + "\u001b[K |████████████████████████████████| 655kB 35.1MB/s \n", + "\u001b[K |████████████████████████████████| 1.0MB 37.8MB/s \n", + "\u001b[?25h Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for regex (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_NWD3P6qH_8_", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import wget\n", + "\n", + "squad_urls = [\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json',\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json',\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json',\n", + " 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json'\n", + "]\n", + "\n", + "for squad_url in squad_urls:\n", + " wget.download(url=squad_url, out='.')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ylorIsqLz_J3", + "colab_type": "code", + "outputId": "e6efed6f-551a-41da-9f18-62ed0417830d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 649 + } + }, + "source": [ + "!wget https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/pytorch_model.bin\n", + "!wget https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/config.json" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2019-09-01 16:22:00-- https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/pytorch_model.bin\n", + "Resolving github.com (github.com)... 192.30.253.113\n", + "Connecting to github.com (github.com)|192.30.253.113|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-8147-fbf9e537f61c?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190901%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190901T162200Z&X-Amz-Expires=300&X-Amz-Signature=3137e708a0e6d08e1ae399eb69fcd41e2f44a7464aa35fdb2d0643b8f5e2b628&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dpytorch_model.bin&response-content-type=application%2Foctet-stream [following]\n", + "--2019-09-01 16:22:00-- https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-8147-fbf9e537f61c?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190901%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190901T162200Z&X-Amz-Expires=300&X-Amz-Signature=3137e708a0e6d08e1ae399eb69fcd41e2f44a7464aa35fdb2d0643b8f5e2b628&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dpytorch_model.bin&response-content-type=application%2Foctet-stream\n", + "Resolving github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)... 52.217.38.20\n", + "Connecting to github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)|52.217.38.20|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 476375014 (454M) [application/octet-stream]\n", + "Saving to: ‘pytorch_model.bin’\n", + "\n", + "pytorch_model.bin 100%[===================>] 454.31M 16.5MB/s in 30s \n", + "\n", + "2019-09-01 16:22:31 (15.4 MB/s) - ‘pytorch_model.bin’ saved [476375014/476375014]\n", + "\n", + "--2019-09-01 16:22:33-- https://github.com/cdqa-suite/cdQA/releases/download/XLNet_cased_vCPU/config.json\n", + "Resolving github.com (github.com)... 192.30.253.113\n", + "Connecting to github.com (github.com)|192.30.253.113|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-84be-890f3b56af43?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190901%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190901T162234Z&X-Amz-Expires=300&X-Amz-Signature=3344f2dcc2a5f06990fbf79137b80686ca2847d86467219e419baa69d4ed33c7&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dconfig.json&response-content-type=application%2Foctet-stream [following]\n", + "--2019-09-01 16:22:34-- https://github-production-release-asset-2e65be.s3.amazonaws.com/165645094/96b5db80-aa35-11e9-84be-890f3b56af43?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20190901%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20190901T162234Z&X-Amz-Expires=300&X-Amz-Signature=3344f2dcc2a5f06990fbf79137b80686ca2847d86467219e419baa69d4ed33c7&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dconfig.json&response-content-type=application%2Foctet-stream\n", + "Resolving github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)... 52.216.130.115\n", + "Connecting to github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)|52.216.130.115|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 641 [application/octet-stream]\n", + "Saving to: ‘config.json’\n", + "\n", + "config.json 100%[===================>] 641 --.-KB/s in 0s \n", + "\n", + "2019-09-01 16:22:35 (43.7 MB/s) - ‘config.json’ saved [641/641]\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-25T14:21:08.091797Z", + "start_time": "2019-06-25T14:21:03.027877Z" + }, + "id": "umJkmO9HFf3L", + "colab_type": "code", + "outputId": "bd5330f0-3027-4316-dcad-6937235a3911", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 133 + } + }, + "source": [ + "import os\n", + "import torch\n", + "from sklearn.externals import joblib\n", + "from cdqa.reader.reader_sklearn import Reader\n", + "\n", + "reader = Reader(model_type='xlnet',\n", + " model_name_or_path='xlnet-base-cased',\n", + " fp16=False,\n", + " output_dir='.',\n", + " no_cuda=False,\n", + " pretrained_model_path='.')" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/externals/joblib/__init__.py:15: DeprecationWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.\n", + " warnings.warn(msg, category=DeprecationWarning)\n", + "100%|██████████| 641/641 [00:00<00:00, 319039.86B/s]\n", + "100%|██████████| 798011/798011 [00:01<00:00, 720164.52B/s]\n", + "100%|██████████| 467042463/467042463 [00:38<00:00, 12212631.71B/s]\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AViocaq-gnQk", + "colab_type": "code", + "outputId": "1a26fc7b-e900-42fc-bfca-2118a2cb880f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + } + }, + "source": [ + "# evaluate the model\n", + "reader.evaluate(X='dev-v2.0.json')" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Evaluating: 7%|▋ | 103/1569 [01:25<20:25, 1.20it/s]" + ], + "name": "stderr" + } + ] + } + ] +} \ No newline at end of file