import logging logger = logging.getLogger('stancedatasets') def format_masked_lm_prompt(stance_df, tokenizer, prompt_type, lang='en', masked_lm_tokens=1): masks_str = "".join([tokenizer.mask_token] * masked_lm_tokens) if lang == 'pl': if prompt_type == 1: return list(stance_df['text']), \ list('Moja postawa w kierunku ' + stance_df['target'] + ' jest: ' + masks_str + '.') elif prompt_type == 2: return list(stance_df['text']), \ list('Moja postawa w kierunku ' + stance_df['target'] + ' jest ' + masks_str + '.') elif prompt_type == 3: return list(stance_df['text']), \ list('Więc moja postawa w kierunku ' + stance_df['target'] + ' jest: ' + masks_str + '.') elif prompt_type == 4: return list(stance_df['text']), \ list('Więc moja postawa w kierunku ' + stance_df['target'] + ' jest ' + masks_str + '.') raise ValueError(f'unknown prompt_type: {prompt_type} for language {lang}')