diff --git a/README.md b/README.md index 16283bc..2ed780e 100644 --- a/README.md +++ b/README.md @@ -27,4 +27,11 @@ If our work is useful for your own, you can cite us with the following BibTex en title = {DetectGPT: Zero-Shot Machine-Generated Text Detection using Probability Curvature}, publisher = {arXiv}, year = {2023}, - } \ No newline at end of file + } + @article{meister2022typical, + url = {https://arxiv.org/abs/2202.00666}, + author = {Meister, Clara and Pimentel, Tiago and Wiher, Gian and Cotterell, Ryan}, + title = {Locally Typical Sampling}, + publisher = {arXiv}, + year = {2022} + } diff --git a/paper_scripts/typical.sh b/paper_scripts/typical.sh new file mode 100644 index 0000000..f54124c --- /dev/null +++ b/paper_scripts/typical.sh @@ -0,0 +1,5 @@ +python run.py --output_name main_typical_p --base_model_name gpt2-xl --mask_filling_model_name t5-3b --n_perturbation_list 1,10,100 --n_samples 500 --pct_words_masked 0.3 --span_length 2 --dataset writing --do_typical_p --typical_p 0.2 +python run.py --output_name main_typical_p --base_model_name EleutherAI/gpt-neo-2.7B --mask_filling_model_name t5-3b --n_perturbation_list 1,10,100 --n_samples 500 --pct_words_masked 0.3 --span_length 2 --dataset writing --do_typical_p --typical_p 0.2 +python run.py --output_name main_typical_p --base_model_name EleutherAI/gpt-j-6B --mask_filling_model_name t5-3b --n_perturbation_list 1,10,100 --n_samples 500 --pct_words_masked 0.3 --span_length 2 --dataset writing --do_typical_p --typical_p 0.2 +python run.py --output_name main_typical_p --base_model_name facebook/opt-2.7b --mask_filling_model_name t5-3b --n_perturbation_list 1,10,100 --n_samples 500 --pct_words_masked 0.3 --span_length 2 --dataset writing --do_typical_p --typical_p 0.2 +python run.py --output_name main_typical_p --batch_size 20 --base_model_name EleutherAI/gpt-neox-20b --mask_filling_model_name t5-11b --n_perturbation_list 1,10,100 --n_samples 500 --pct_words_masked 0.3 --span_length 2 --dataset writing --do_typical_p --typical_p 0.2 diff --git a/run.py b/run.py index cc50274..7dbdaa3 100644 --- a/run.py +++ b/run.py @@ -236,6 +236,8 @@ def sample_from_model(texts, min_words=55, prompt_tokens=30): sampling_kwargs['top_p'] = args.top_p elif args.do_top_k: sampling_kwargs['top_k'] = args.top_k + elif args.do_typical_p: + sampling_kwargs['typical_p'] = args.typical_p min_length = 50 if args.dataset in ['pubmed'] else 150 outputs = base_model.generate(**all_encoded, min_length=min_length, max_length=200, do_sample=True, **sampling_kwargs, pad_token_id=base_tokenizer.eos_token_id, eos_token_id=base_tokenizer.eos_token_id) decoded = base_tokenizer.batch_decode(outputs, skip_special_tokens=True) @@ -766,6 +768,8 @@ def eval_supervised(data, model): parser.add_argument('--top_k', type=int, default=40) parser.add_argument('--do_top_p', action='store_true') parser.add_argument('--top_p', type=float, default=0.96) + parser.add_argument('--do_typical_p', action='store_true') + parser.add_argument('--typical_p', type=float, default=0.96) parser.add_argument('--output_name', type=str, default="") parser.add_argument('--openai_model', type=str, default=None) parser.add_argument('--openai_key', type=str)