select
*
from mldb.ONNXEmbeddings(
on (SELECT CAST(rev_id AS VARCHAR(21)) AS rev_id, rev_text AS txt FROM amazon_reviews_25)
on (select model_id, model from onnx_models where model_id = 'bge-small-en-v1.5') DIMENSION
on (select tokenizer from embeddings_tokenizers where model_id = 'bge-small-en-v1.5') DIMENSION
USING
Accumulate('rev_id')
EncodeMaxLength(10)
ModelOutputTensor('sentence_embedding')
) a
;
*** Query completed. 25 rows found. 2 columns returned.
*** Total elapsed time was 1 second.
The following example is a snippet of the total output:
rev_id sentence_embedding ------------ ----------------------------------------------------------------------------------- A3V1MKC2BVWY48 311CA6BC7C3ED23CEB8C31BB0E020E3D119B9E3CAFB0EC3B045A863D6D9830BD58CD603D99DABDBCA86A8DBC915F013DD3FA04BCA1A4F83C8B2ACCBCCE9FA4BC94AD1EBC6B22D5BCDC66CBBDB A26GKZPS079GFF 460A94BDC5F077BD4EF2CA3C37C856BDCB71A93C962057BA5E57803DA849F73CD96A8E3DBD53DBBBCF7EA13CB203D6BBECC5EABC3FA6373D5C67E43C9EEBB63C66A2713D1FA1CABCAF7517BD7 AUTNO7VDY4H4A 817A343CBC90533C3443F5BC2181213CBF8B16BC16ECC83A9134B43D1B41BB3CE27DDD3CF0BD43BD79DCC03C0C9A2C3D9224AEBCA705BF3CB7A223BCA15F21BC1EA41C3DDDA5853C4BC3E5BD3 A2S166WSCFIFP5 96DFF4BC51600BBDF8FE3F3DE68042BC0CB51CBB92C72B3D1A1C233D5A6DA9BBCAD3E93CCCFAF2BC555ABCBCAADB2BBCD5FE20BCACCC913C7BDE3CBC2E8508BCA1E1B53C4E52A23CD92FB7BD2 A5E9TSD20U9PR D65FB7BCB9BD48BDB4DE6B3D57ABAA3C5F65083D1C68C9BC1F869D3D8BAD2EBC1B681ABD9A7B7CBD2B3A5CBC0DE18BBDD61FFC3BEF601B3D0DBF91BB9B781EBD5E9B59BBCDDCF93CCEB0CFBC3 ...