chatglm-6b-int4 / demo_api.py
NewBreaker's picture
five
3129ec8
raw
history blame
632 Bytes
# from transformers import AutoModel,AutoTokenizer
# tokenizer = AutoTokenizer.from_pretrained('NewBreaker/chatglm-6b-int4',trust_remote_code=True)
# model = AutoModel.from_pretrained('NewBreaker/chatglm-6b-int4',trust_remote_code=True)
from transformers import AutoModelForQuestionAnswering
model = AutoModelForQuestionAnswering.from_pretrained("NewBreaker/chatglm-6b-int4")
# import requests
# api_url = "https://api-inference.huggingface.co/usage/pinned_models"
# headers = {"Authorization": f"Bearer {API_TOKEN}"}
# response = requests.get(api_url, headers=headers)
# # {"pinned_models": [...], "allowed_pinned_models": 5}