inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
outputs = model(**inputs)
def mean_pooling(token_embeddings, mask): token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.) sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None] return sentence_embeddings embeddings = mean_pooling(outputs[0], inputs['attention_mask'])