-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathinstructions.py
More file actions
2287 lines (1804 loc) · 76.8 KB
/
instructions.py
File metadata and controls
2287 lines (1804 loc) · 76.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2025 Allen Institute for AI.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Library of instructions."""
import logging
import os
import random
import re
import string
from pathlib import Path
from typing import Dict, Optional, Sequence, Union
# Set NLTK data path to local directory before importing nltk
_nltk_data_dir = Path(__file__).parent / ".nltk_data"
_nltk_data_dir.mkdir(exist_ok=True)
os.environ.setdefault("NLTK_DATA", str(_nltk_data_dir))
import nltk
nltk.data.path.insert(0, str(_nltk_data_dir))
import emoji
import syllapy
import unicodedata
from collections import Counter
import csv
import io
import instructions_util
logger = logging.getLogger(__name__)
_InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]]
# The number of keywords.
_NUM_KEYWORDS = 2
# The number of words in the response.
_NUM_WORDS_LOWER_LIMIT = 100
_NUM_WORDS_UPPER_LIMIT = 500
# The number of numbers.
_NUM_NUMBERS = 6
# Period length for periodic words.
_NUM_WORD_CYCLE = 30
# Maximum number of times a word can be repeated.
_MAX_REPEATS = 5
# Which sentence must contain a keyword.
_NUM_KEYWORD_SENTENCE = 20
# Minimum number of pronouns.
_NUM_PRONOUNS = 25
# The size of increment for lengths.
_NUM_INCREMENT = 5
# The number of coordinating conjunctions.
_NUM_CONJUNCTIONS = 6
class Instruction:
"""An instruction template."""
def __init__(self, instruction_id):
self.id = instruction_id
def build_description(self, **kwargs):
raise NotImplementedError("`build_description` not implemented.")
def get_instruction_args(self):
raise NotImplementedError("`get_instruction_args` not implemented.")
def get_instruction_args_keys(self):
raise NotImplementedError("`get_instruction_args_keys` not implemented.")
def check_following(self, value):
raise NotImplementedError("`check_following` not implemented.")
# Everything as follows is part of OOD IFEval
class WordCountRangeChecker(Instruction):
"""Word Count Range: The response must contain between X and Y words."""
def build_description(self, *, min_words=None, max_words=None):
"""Build the instruction description.
Args:
min_words: An integer specifying the minimum number of words contained in the response.
max_words: An integer specifying the maximum number of words contained in the response.
Returns:
A string representing the instruction description.
"""
self._min_words = min_words
self._max_words = max_words
if self._min_words is None or self._min_words < 0:
self._min_words = random.randint(
_NUM_WORDS_LOWER_LIMIT, _NUM_WORDS_UPPER_LIMIT
)
# Make the range small
if self._max_words is None or self._max_words < 0:
self._max_words = self._min_words + random.randint(int(self._min_words * 0.05), int(self._min_words * 0.1))
self._description_pattern = "The response must contain between {min_words} and {max_words} words."
return self._description_pattern.format(
min_words=self._min_words, max_words=self._max_words
)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"min_words": self._min_words, "max_words": self._max_words}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["min_words", "max_words"]
def check_following(self, value):
"""Checks if the response contains the expected number of words."""
num_words = instructions_util.count_words(value)
return self._min_words <= num_words <= self._max_words
class UniqueWordCountChecker(Instruction):
"""Unique Word Count: The response must contain X unique words."""
def build_description(self, *, N=None):
"""Build the instruction description.
Args:
n: An integer specifying the number of unique words contained in the response.
Returns:
A string representing the instruction description.
"""
self._num_unique_words = N
if self._num_unique_words is None or self._num_unique_words < 0:
self._num_unique_words = random.randint(
_NUM_WORDS_LOWER_LIMIT, _NUM_WORDS_UPPER_LIMIT
)
self._description_pattern = "Use at least {N} unique words in the response."
return self._description_pattern.format(N=self._num_unique_words)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"N": self._num_unique_words}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["N"]
def check_following(self, value):
"""Checks if the response contains the expected number of unique words."""
words = value.lower().split()
unique_words = set()
for word in words:
unique_words.add(word.strip(''.join(string.punctuation) + ' '))
# Convert to set to get unique words
return len(unique_words) >= self._num_unique_words
class StopWordPercentageChecker(Instruction):
"""Ensure that stop words constitute no more than {percentage}% of the total words in your response."""
def build_description(self, *, percentage=None):
"""Build the instruction description.
Args:
percentage: An integer specifying the percentage of stop words that are allowed in the response.
Returns:
A string representing the instruction description.
"""
self._percentage = percentage
if self._percentage is None or self._percentage < 0:
self._percentage = random.randint(1, 100)
self._description_pattern = "Ensure that stop words constitute no more than {percentage}% of the total words in your response."
return self._description_pattern.format(percentage=self._percentage)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"percentage": self._percentage}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["percentage"]
def check_following(self, value):
"""Checks if the response contains the expected percentage of stop words."""
num_words = instructions_util.count_words(value)
if num_words == 0:
return False
num_stopwords = instructions_util.count_stopwords(value)
stopword_percentage = (num_stopwords / num_words) * 100
return stopword_percentage <= self._percentage
class SentTypeRatioChecker(Instruction):
"""Maintain a 2:1 ratio of declarative to interrogative sentences."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Maintain a 2:1 ratio of declarative to interrogative sentences."
return self._description_pattern
def get_instruction_args(self):
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response contains the expected ratio of declarative to interrogative sentences."""
# Split the text into sentences
sentences = instructions_util.split_into_sentences(value)
# Count the number of declarative and interrogative sentences
declarative_count = sum(1 for sentence in sentences if sentence.endswith('.'))
interrogative_count = sum(1 for sentence in sentences if sentence.endswith('?'))
# Check if the ratio is 2:1
return declarative_count == 2 * interrogative_count
class SentBalanceChecker(Instruction):
"""Ensure that the ratio of sentence types (declarative, interrogative, exclamatory) is balanced."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Ensure that the ratio of sentence types (declarative, interrogative, exclamatory) is balanced."
return self._description_pattern
def get_instruction_args(self):
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response contains a balanced ratio of sentence types."""
# Split the text into sentences
sentences = instructions_util.split_into_sentences(value)
# Count the number of each sentence type
declarative_count = sum(1 for sentence in sentences if sentence.endswith('.'))
interrogative_count = sum(1 for sentence in sentences if sentence.endswith('?'))
exclamatory_count = sum(1 for sentence in sentences if sentence.endswith('!'))
# Check if the ratio of sentence types is balanced
return declarative_count == interrogative_count == exclamatory_count
class ConjunctionCountChecker(Instruction):
"""Use at least {small_n} different coordinating conjunctions in the response."""
def build_description(self, *, small_n=None):
"""Build the instruction description.
Args:
small_n: An integer specifying the number of different coordinating conjunctions contained in the response.
Returns:
A string representing the instruction description.
"""
self._num_conjunctions = small_n
if self._num_conjunctions is None or self._num_conjunctions < 0:
self._num_conjunctions = random.randint(2, _NUM_CONJUNCTIONS)
self._description_pattern = "Use at least {small_n} different coordinating conjunctions in the response."
return self._description_pattern.format(small_n=self._num_conjunctions)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"small_n": self._num_conjunctions}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["small_n"]
def check_following(self, value):
"""Checks if the response contains the expected number of different coordinating conjunctions."""
# Split the text into words
words = value.split()
# Count the number of coordinating conjunctions
conjunctions = [word for word in words if
word.strip(''.join(string.punctuation) + ' ').lower() in ['and', 'but', 'for', 'nor', 'or',
'so', 'yet']]
unique_conjunctions = set(conjunctions)
return len(unique_conjunctions) >= self._num_conjunctions
class PersonNameCountChecker(Instruction):
"""Mention at least {N} different person names in the response, from this list of person names: Emma, Liam, Sophia..."""
def build_description(self, *, N=None):
"""Build the instruction description.
Args:
N: An integer specifying the minimum number of unique person names contained in the response.
Returns:
A string representing the instruction description.
"""
self._num_person_names = N
if self._num_person_names is None or self._num_person_names < 0:
self._num_person_names = random.randint(1, 50)
self._description_pattern = "Mention at least {N} different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan ."
return self._description_pattern.format(N=self._num_person_names)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"N": self._num_person_names}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["N"]
def check_following(self, value):
"""Checks if the response contains at least the expected number of unique person names."""
person_name_list = ["Emma", "Liam", "Sophia", "Jackson", "Olivia", "Noah", "Ava", "Lucas", "Isabella", "Mason",
"Mia", "Ethan", "Charlotte",
"Alexander",
"Amelia",
"Benjamin",
"Harper",
"Leo",
"Zoe",
"Daniel",
"Chloe",
"Samuel",
"Lily",
"Matthew",
"Grace",
"Owen",
"Abigail",
"Gabriel",
"Ella",
"Jacob",
"Scarlett",
"Nathan",
"Victoria",
"Elijah",
"Layla",
"Nicholas",
"Audrey",
"David",
"Hannah",
"Christopher",
"Penelope",
"Thomas",
"Nora",
"Andrew",
"Aria",
"Joseph",
"Claire",
"Ryan",
"Stella",
"Jonathan"
]
# Extract the named entities
person_names = []
for name in person_name_list:
# Use regex with word boundaries
pattern = r'\b{}\b'.format(re.escape(name))
if re.search(pattern, value):
person_names.append(name)
unique_person_names = set(person_names)
return len(unique_person_names) >= self._num_person_names
class NGramOverlapChecker(Instruction):
"""Maintain a trigram overlap of {percentage}% (±2%) with the provided reference text."""
def build_description(self, *, reference_text=None, percentage=None):
"""Build the instruction description.
Args:
reference_text: A string representing the reference text.
percentage: An integer specifying the percent trigram overlap
to maintain in the response.
Returns:
A string representing the instruction description.
"""
self._reference_text = reference_text
self._percentage = percentage
if self._percentage is None or self._percentage < 0:
self._percentage = random.randint(1, 100)
self._description_pattern = "Maintain a trigram overlap of {percentage}% (±2%) with the provided reference text."
return self._description_pattern.format(percentage=self._percentage)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"reference_text": self._reference_text, "percentage": self._percentage}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["reference_text", "percentage"]
def check_following(self, value):
"""Checks if the response maintains a trigram overlap with the reference text within 2% of {percent}."""
n = 3
ngrams = set(nltk.ngrams(value, n))
ref_ngrams = set(nltk.ngrams(self._reference_text, n))
if not ngrams:
return False
overlap = len(ngrams.intersection(ref_ngrams)) / len(ngrams)
return self._percentage - 2 <= overlap * 100 <= self._percentage + 2
class NumbersCountChecker(Instruction):
"""Include exactly {N} numbers in the response."""
def build_description(self, *, N=None):
"""Build the instruction description.
Args:
N: An integer specifying the exact number of numbers
that is required to appear in the response.
Returns:
A string representing the instruction description.
"""
self._count_numbers = N
if self._count_numbers is None or self._count_numbers < 0:
self._count_numbers = random.randint(1, _NUM_NUMBERS)
self._description_pattern = "Include exactly {N} numbers in the response."
return self._description_pattern.format(N=self._count_numbers)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"N": self._count_numbers}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["N"]
def check_following(self, value):
"""Checks if the response includes exactly {N} numbers."""
# Strip punctuation to handle decimals and commas in numbers correctly
value = value.translate(str.maketrans('', '', string.punctuation))
numbers = re.findall(r'\d+', value)
return len(numbers) == self._count_numbers
class AlphabetLoopChecker(Instruction):
"""Each word must start with the next letter of the alphabet, looping back to 'A' after 'Z'."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Each word must start with the next letter of the alphabet, looping back to 'A' after 'Z'."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if each word of the response starts with the next letter of the alphabet."""
value = value.translate(str.maketrans('', '', string.punctuation))
words = value.strip(''.join(string.punctuation) + ' ').split()
if not words:
return False
alphabet = string.ascii_lowercase
correct_letter = words[0][0].lower()
if correct_letter not in alphabet: # numbers are fails
return False
for word in words[1:]:
word = word.strip(''.join(string.punctuation) + ' ').lower()
if not word:
continue
correct_letter = alphabet[(alphabet.index(correct_letter) + 1) % 26]
if word[0] != correct_letter:
return False
return True
class SingleVowelParagraphChecker(Instruction):
"""Write a paragraph using words that contain only three type of vowels."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Write a paragraph using words that contain only three types of vowels."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if no more than three types of vowels are used in the response and the response is only 1 paragraph."""
paragraphs = value.strip().split('\n')
if len(paragraphs) != 1:
return False
paragraph = paragraphs[0].lower()
vowels = set('aeiou')
paragraph_vowels = set([char for char in paragraph if char in vowels])
return len(paragraph_vowels) <= 3
class ConsonantClusterChecker(Instruction):
"""Ensure each word in your response has at least one consonant cluster (two or more consonants together)."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Ensure each word in your response has at least one consonant cluster (two or more consonants together)."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if each word in the response includes at least one consonant cluster."""
words = value.lower().strip().split()
consonants = set('bcdfghjklmnpqrstvwxyz')
for word in words:
cluster = False
for i in range(len(word) - 1):
if word[i] in consonants and word[i + 1] in consonants:
cluster = True
break
if not cluster:
return False
return True
class IncrementingAlliterationChecker(Instruction):
"""Each sentence must have a longer sequence of consecutive alliterative words than the previous one."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Each sentence must have a longer sequence of consecutive alliterative words than the previous one."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if each sentence in the response has more alliterative words (determined by common first letter) than the previous sentence."""
sentences = instructions_util.split_into_sentences(value)
prev_alliteration = -1
for sentence in sentences:
words = sentence.lower().split()
alliteration = 0
prev_alliterative = False
new_words = []
for word in words:
clean = word.lstrip(''.join(string.punctuation) + ' ')
if clean:
new_words.append(clean)
for i in range(len(new_words) - 1):
if new_words[i][0] == new_words[i + 1][0]:
if prev_alliterative:
alliteration += 1
else:
alliteration += 2
prev_alliterative = True
else:
prev_alliterative = False
if alliteration <= prev_alliteration:
return False
prev_alliteration = alliteration
return True
class PalindromeChecker(Instruction):
"""Include at least 10 single-word palindromes, each at least 5 characters long."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Include at least 10 single-word palindromes, each at least 5 characters long."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response includes at least 10 single-word palindromes of length at least 5."""
value = value.translate(str.maketrans('', '', string.punctuation))
words = value.lower().split()
palindromes = [word for word in words if word == word[::-1] and len(word) >= 5]
return len(palindromes) >= 10
class PunctuationCoverChecker(Instruction):
"""Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!)."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!)."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response includes every standard punctuation mark at least once, including the interrobang (?!)."""
punctuation = {".", ",", "!", "?", ";", ":"}
if not ('!?' in value or '?!' in value or '‽' in value):
return False
new_value = value.replace('?!', '', 1)
if len(new_value) == len(value):
new_value = value.replace('!?', '', 1)
for char in new_value:
if char in punctuation:
punctuation.remove(char)
return not punctuation
class NestedParenthesesChecker(Instruction):
"""Nest parentheses (and [brackets {and braces}]) at least 5 levels deep."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Nest parentheses (and [brackets {and braces}]) at least 5 levels deep."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response includes a correctly closed set of at least 5 nested brackets."""
levels = []
min_levels = 5
max_depth = 0
depth_stack = [] # Track depth per matched group
for char in value:
if char in "([{":
levels.append(char)
if len(levels) > max_depth:
max_depth = len(levels)
elif char in ")]}":
if levels and (
(levels[-1] == '(' and char == ')') or
(levels[-1] == '[' and char == ']') or
(levels[-1] == '{' and char == '}')
):
levels.pop()
# Check if we just closed a group that reached 5+ depth
if max_depth >= min_levels and len(levels) < max_depth:
return True
else:
# Mismatch — reset
levels = []
max_depth = 0
return False
class NestedQuotesChecker(Instruction):
"""Include quotes within quotes within quotes, at least 3 levels deep, alternating between double quotes and single quotes."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Include quotes within quotes within quotes, at least 3 levels deep, alternating between double quotes and single quotes."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response includes nested quotes to at least 3 levels
alternating between " and ' starting with either character."""
levels = []
min_levels = 3
reached_depth = 0
current_depth = 0
for char in value:
if len(levels) != 0 and char == levels[-1]:
levels.pop()
current_depth -= 1
if reached_depth - current_depth >= min_levels:
return True
elif char == '"' or char == "'":
levels.append(char)
current_depth += 1
if current_depth > reached_depth:
reached_depth = current_depth
return False
class PrimeLengthsChecker(Instruction):
"""Use only words with lengths that are prime numbers."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Use only words with lengths that are prime numbers."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response only includes words with prime length."""
value = value.translate(str.maketrans('', '', string.punctuation))
words = value.split()
primes = set([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97])
for word in words:
if len(word) not in primes:
return False
return True
class OptionsResponseChecker(Instruction):
"""Answer with one of the following options: {options}. Do not give any explanation."""
def build_description(self, *, options=None):
"""Build the instruction description.
Args:
options: A string specifying the permitted options for
the response.
Returns:
A string representing the instruction description.
"""
# Options string may be: yes/no/maybe, I know or I don't know, a), b), c), d)
# Can be separated by "/", "or", ","
options_bank = ["yes/no/maybe", "I know or I don't know", "a), b), c), d)"]
if options is None:
options = random.choice(options_bank)
# Be more strict about format for multiple choice letters than for text options
self._strict = False
if re.match(r"\W*[aA]\W*[bB]\W*[cC]\W*", options) is not None:
self._strict = True
if "/" in options:
separator = "/"
elif "or" in options:
separator = "or"
else:
separator = ","
self._options = [option.strip() for option in options.split(separator)]
self._options_text = options # in text, shouldn't be formatted as a list
self._description_pattern = "Answer with one of the following options: {options}. Do not give any explanation."
return self._description_pattern.format(options=self._options_text)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"options": self._options_text}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["options"]
def check_following(self, value):
"""Checks if the response is exactly one of {options}."""
if self._strict:
return value in self._options
value = value.strip(''.join(string.punctuation) + ' ').lower()
for option in self._options:
if option.strip(''.join(string.punctuation) + ' ').lower() == value:
return True
return False
class NewLineWordsChecker(Instruction):
"""Write each word on a new line."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Write each word on a new line."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response has each word on a new line."""
value = value.translate(str.maketrans('', '', string.punctuation))
lines = value.strip().split('\n')
while '' in lines:
lines.remove('')
return len(lines) == len(value.strip().split())
class EmojiSentenceChecker(Instruction):
"""Please use an emoji at the end of every sentence."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Please use an emoji at the end of every sentence."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response includes an emoji at the end of every sentence."""
sentences = instructions_util.split_into_sentences(value)
for i, sentence in enumerate(sentences):
stripped = sentence.translate(str.maketrans('', '', string.punctuation)).strip()
# check for empty string
if not stripped:
return False
last_char = stripped[-1]
# because blank spaces are treated oddly
second_last_char = stripped[-2] if len(stripped) > 1 else stripped[-1]
if not emoji.is_emoji(last_char) and not emoji.is_emoji(second_last_char):
if i < len(sentences) - 1:
stripped = sentences[i + 1].translate(str.maketrans('', '', string.punctuation)).strip()
# fixed empty string
if not stripped:
return False
first_char = stripped[0]
if not emoji.is_emoji(first_char):
return False
else:
return False
return True
class CharacterCountUniqueWordsChecker(Instruction):
"""Respond with three sentences, all containing the same number of characters but using all different words."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "Respond with three sentences, all containing the same number of characters but using all different words."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return []
def check_following(self, value):
"""Checks if the response has exactly 3 sentences containing the same number of characters but different words."""
sentences = instructions_util.split_into_sentences(value)
if len(sentences) != 3:
return False
char_count = len(sentences[0].strip())
for sentence in sentences:
if len(sentence.strip()) != char_count:
return False
return True
class NthWordJapaneseChecker(Instruction):
"""Every {N}th word of your response must be in Japanese."""
def build_description(self, *, N=None):
"""Build the instruction description.
Args:
N: An integer specifying the cycle length for
Japanese words to appear in the response.
Returns:
A string representing the instruction description.
"""
self._japanese_position = N
if self._japanese_position is None or self._japanese_position < 0:
self._japanese_position = random.randint(1, _NUM_WORD_CYCLE)
self._description_pattern = "Every {N}th word of your response must be in Japanese."
if N % 10 == 1:
self._description_pattern = "Every {N}st of your response must be in Japanese."
if N % 10 == 2:
self._description_pattern = "Every {N}nd of your response must be in Japanese."
elif N % 10 == 3:
self._description_pattern = "Every {N}rd of your response must be in Japanese."
return self._description_pattern.format(N=self._japanese_position)
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return {"N": self._japanese_position}
def get_instruction_args_keys(self):
"""Returns the args keys of `build_description`."""
return ["N"]
def check_following(self, value):
"""Checks if every {N}th word of the response is in Japanese."""
def is_japanese(text):
"""
Checks if a string contains Japanese characters (Hiragana, Katakana, or Kanji).
Args:
text: The string to check.
Returns:
True if the string contains Japanese characters, False otherwise.
"""
japanese_pattern = re.compile(r'[\u3040-\u30ff\u4e00-\u9fff]')
return bool(japanese_pattern.search(text))
words = value.split()
for i, word in enumerate(words):
word = word.strip(''.join(string.punctuation) + ' ')
if (i + 1) % self._japanese_position == 0 and word and not word.isdigit():
if not is_japanese(word):
return False
return True
class StartWithVerbChecker(Instruction):
"""The response must start with a verb."""
def build_description(self):
"""Build the instruction description."""
self._description_pattern = "The response must start with a verb."
return self._description_pattern
def get_instruction_args(self):
"""Returns the keyword args of `build_description`."""
return None