?

昇騰服務(wù)器將BAAI/bge-large-zh-v1.5向量模型（bge模型）轉(zhuǎn)onnx轉(zhuǎn)om并部署

一、下載模型

使用huggingface-cli命令行下載

mkdir -p BAAI && ls BAAI|xargs huggingface-cli download BAAI/bge-large-zh-v1.5 --resume-download --local-dir-use-symlinks False --local-dir BAAI --exclude

二、轉(zhuǎn)onnx

from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np
import os

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('BAAI')
model = AutoModel.from_pretrained('BAAI')
model.eval()


def generate_random_data(shape, dtype, low=0, high=2):
    if dtype in ["float32", "float16"]:
        return np.random.random(shape).astype(dtype)
    elif dtype in ["int32", "int64"]:
        return np.random.uniform(low, high, shape).astype(dtype)
    else:
        raise NotImplementedError("Not supported format: {}".format(dtype))

input_data = (
    torch.Tensor(generate_random_data([1, 256], "int64")).to(torch.int64),
    torch.Tensor(generate_random_data([1, 256], "int64")).to(torch.int64),
    torch.Tensor(generate_random_data([1, 256], "int64")).to(torch.int64)
)

input_names = ["input_ids", "attention_mask", "token_type_ids"]
output_names = ["out"]
torch.onnx.export(
    model,
    input_data,
    os.path.join("BAAI", "bge-small-zh-v1.5.onnx"),
    input_names=input_names,
    output_names=output_names,
    opset_version=11,
)

三、onnx文件轉(zhuǎn)om文件

atc --model=./bge-small-zh-v1.5.onnx --framework=5 --output=./bge-small-zh-v1.5 --input_format=ND --log=debug --input_shape "input_ids:1,256;token_type_ids:1,256;attention_mask:1,256" --out_nodes "out" --soc_version=Ascend310P3

注意：

--soc_version的310P3是你npu的型號(hào)，使用npu-smi info查看

四、運(yùn)行om文件獲取文本向量

運(yùn)行代碼需要第五步安裝完依賴(lài)才可以運(yùn)行

import numpy as np
import os
import sys
from transformers import (
    # BertAdapterModel,
    AutoConfig,
    AutoTokenizer)

# path = os.path.dirname(os.path.abspath(__file__))
# sys.path.append(os.path.join(path, "../embed/acllite"))

from ACLLite.python.acllite_resource import AclLiteResource
from ACLLite.python.acllite_model import AclLiteModel

# om文件路徑
model_path = 'bge-small-zh-v1.5.om'


def postprocess(output, attention_mask=None):
    sentence_embeddings_np = output[0][:, 0]
    sentence_embeddings_np = sentence_embeddings_np / np.linalg.norm(sentence_embeddings_np, ord=2, axis=1,
                                                                     keepdims=True)
    return sentence_embeddings_np


def preprocess(text, tokenizer):
    # inputs = tokenizer(text, padding='max_length', max_length=512, return_tensors='pt')
    inputs = tokenizer(text, padding='max_length', max_length=256, truncation=True, return_tensors='pt')

    # inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt')

    input_ids = inputs['input_ids'].detach().cpu().numpy()
    attention_mask = inputs['attention_mask'].detach().cpu().numpy()
    token_type_ids = inputs['token_type_ids'].detach().cpu().numpy()
    inputs = [input_ids, attention_mask, token_type_ids]
    return inputs, attention_mask


acl_resource = AclLiteResource()
acl_resource.init()
model = AclLiteModel(model_path)
# 模型文件路徑
tokenizer = AutoTokenizer.from_pretrained('bge-large-zh-v1.5')


def query(input):
    inputs, attention_mask = preprocess(input.query, tokenizer)
    outputs = model.execute(inputs)
    outputs = postprocess(outputs, attention_mask)
    return {"vecters": outputs.tolist()}


def cosine_similarity(embedding1, embedding2):
    # 計(jì)算兩個(gè)向量的內(nèi)積
    dot_product = np.dot(embedding1, embedding2)

    # 計(jì)算兩個(gè)向量的模長(zhǎng)
    norm1 = np.linalg.norm(embedding1)
    norm2 = np.linalg.norm(embedding2)

    # 計(jì)算余弦相似度
    similarity = dot_product / (norm1 * norm2)
    return similarity


if __name__ == '__main__':
    text_1 = "我愛(ài)天安門(mén)"

    inputs_1, attention_mask = preprocess(text_1, tokenizer)

    result_list_1 = model.execute(inputs_1)

    embedding1 = postprocess(result_list_1)[0]
    # 輸出向量
    print("embedding1",embedding1)
    # 余弦相似度
    # print(cosine_similarity(embedding1, embedding2))

接口形式：

import numpy as np
import os
from fastapi import FastAPI, Depends
from pydantic import BaseModel
from typing import List
from transformers import AutoTokenizer
from ACLLite.python.acllite_resource import AclLiteResource
from ACLLite.python.acllite_model import AclLiteModel

# om文件路徑
model_path = 'bge-small-zh-v1.5.om'
# 模型文件路徑
model_name = 'bge-large-zh-v1.5'

# 全局變量
acl_resource = None
model = None

app = FastAPI()

tokenizer_ = AutoTokenizer.from_pretrained(model_name)

class InputItem(BaseModel):
    input: List[str]
    model: str

class EmbeddingResponse(BaseModel):
    object: str
    embedding: List[float]

class EmbeddingsResponse(BaseModel):
    data: List[EmbeddingResponse]

def postprocess(output, attention_mask=None):
    sentence_embeddings_np = output[0][:, 0]
    sentence_embeddings_np = sentence_embeddings_np / np.linalg.norm(sentence_embeddings_np, ord=2, axis=1, keepdims=True)
    return sentence_embeddings_np

def preprocess(texts, tokenizer):
    inputs = tokenizer(texts, padding='max_length', max_length=256, truncation=True, return_tensors='pt')
    input_ids = inputs['input_ids'].detach().cpu().numpy()
    attention_mask = inputs['attention_mask'].detach().cpu().numpy()
    token_type_ids = inputs['token_type_ids'].detach().cpu().numpy()
    return input_ids, attention_mask, token_type_ids

@app.post("/v1/embeddings", response_model=EmbeddingsResponse)
async def get_embeddings(input_data: InputItem):
    texts = input_data.input
    model_n = input_data.model

    input_ids, attention_mask, token_type_ids = preprocess(texts, tokenizer_)

    outputs = model.execute([input_ids, attention_mask, token_type_ids])
    embeddings = postprocess(outputs, attention_mask)

    response_data = []
    for embedding in embeddings:
        response_data.append({
            "object": "embedding",
            "embedding": embedding.tolist(),
        })

    return {"data": response_data}

@app.on_event("startup")
def startup_event():
    global acl_resource, model
    acl_resource = AclLiteResource()
    acl_resource.init()
    model = AclLiteModel(model_path)

def start_server():
    import uvicorn
    # om_baai_main是你腳本名字
    uvicorn.run("om_baai_main:app", host=os.environ.get('EMBEDDING_HOST', '0.0.0.0'),
                port=int(os.environ.get('EMBEDDING_PORT', 8000)), workers=int(os.environ.get('EMBEDDING_WORKERS', 1)))

if __name__ == "__main__":
    start_server()

接口調(diào)用方式：

curl --location --request POST 'http://192.168.21.30:8000/v1/embeddings' \
--header 'Content-Type: application/json' \
--data-raw '{
    "input": [
        "八一建軍節(jié)前夕，中共中央政治局就推進(jìn)現(xiàn)代邊海空防建設(shè)進(jìn)行第十六次集體學(xué)習(xí)。"
    ],
    "model": "bge-large-zh-v1.5" # 隨便寫(xiě)，沒(méi)做校驗(yàn)
}'

五、安裝ACLLite依賴(lài)

from ACLLite.python.acllite_resource import AclLiteResource
from ACLLite.python.acllite_model import AclLiteModel

需要安裝ACLLite，地址是：https://gitee.com/ascend/ACLLite

按照源碼編譯安裝

此過(guò)程可能提示安裝git和sudo：

# 安裝git
apt-get update
apt-get install -y git

# 安裝sudo
apt-get install sudo

安裝以后運(yùn)行代碼，會(huì)提示ACLLite項(xiàng)目里找不到包，實(shí)際是導(dǎo)包路徑問(wèn)題，進(jìn)入項(xiàng)目找到對(duì)應(yīng)文件改一下路徑就可以

比如：

將from acllite_logger import log_info改成from ACLLite.python.acllite_logger import log_info

除此以外還要安裝一個(gè)Pillow：pip install Pillow==9.5.0

posted @ 2024-08-01 18:36 A肆閱讀(13) 評(píng)論(0) 收藏舉報(bào)

刷新頁(yè)面返回頂部

? ?

Acheng1011

昇騰服務(wù)器將BAAI/bge-large-zh-v1.5向量模型（bge模型）轉(zhuǎn)onnx轉(zhuǎn)om并部署

公告