.set width 300
select
top 1 *
from mldb.ONNXEmbeddings(
on (SELECT rev_id, rev_text AS txt FROM amazon_reviews_25)
on (select model_id, model from onnx_models where model_id = 'bge-small-en-v1.5') DIMENSION
on (select tokenizer from embeddings_tokenizers where model_id = 'bge-small-en-v1.5') DIMENSION
USING
Accumulate('rev_id', 'txt')
ModelOutputTensor('sentence_embedding')
ShowModelProperties('True')
) as td
;
*** Query completed. One row found. One column returned.
*** Total elapsed time was 1 second.
ModelProperties
------------------------------------------------------------------------------------------------------------------------------
Input(s): {name: input_ids, tensor: INT64[batch_size, sequence_length]}, {name: attention_mask, tensor: INT64[batch_size, sequence_length]} Output(s): {name: token_embeddings, tensor: FLOAT32[batch_size, sequence_length, 384]}, {name: sentence_embedding, tensor: FLOAT32[batch_size, 384]}