Instructions to use LLM360/CrystalChat-7B-Web2Code with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use LLM360/CrystalChat-7B-Web2Code with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="LLM360/CrystalChat-7B-Web2Code", trust_remote_code=True)

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("LLM360/CrystalChat-7B-Web2Code", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use LLM360/CrystalChat-7B-Web2Code with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "LLM360/CrystalChat-7B-Web2Code"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "LLM360/CrystalChat-7B-Web2Code",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/LLM360/CrystalChat-7B-Web2Code

SGLang

How to use LLM360/CrystalChat-7B-Web2Code with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "LLM360/CrystalChat-7B-Web2Code" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "LLM360/CrystalChat-7B-Web2Code",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "LLM360/CrystalChat-7B-Web2Code" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "LLM360/CrystalChat-7B-Web2Code",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use LLM360/CrystalChat-7B-Web2Code with Docker Model Runner:
```
docker model run hf.co/LLM360/CrystalChat-7B-Web2Code
```

CrystalChat-7B-Web2Code / tokenization_crystalcoder_fast.py

qazimbhat1

Add initial model files

acbaabd almost 2 years ago

raw

history blame contribute delete

5.16 kB


	import os
	from shutil import copyfile
	from typing import Optional, Tuple

	from tokenizers import processors

	from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
	from transformers.utils import is_sentencepiece_available, logging
	from transformers.utils.versions import require_version


	require_version("tokenizers>=0.13.3")



	logger = logging.get_logger(__name__)
	VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model", "tokenizer_file": "tokenizer.json"}

	# fmt: off
	DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your \
	answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure\
	that your responses are socially unbiased and positive in nature.

	If a question does not make any sense, or is not factually coherent, explain why instead of answering something not \
	correct. If you don't know the answer to a question, please don't share false information."""
	# fmt: on


	class CrystalCoderTokenizerFast(PreTrainedTokenizerFast):


	vocab_files_names = VOCAB_FILES_NAMES
	slow_tokenizer_class = None
	padding_side = "left"
	model_input_names = ["input_ids", "attention_mask"]

	def __init__(
	self,
	vocab_file=None,
	tokenizer_file=None,
	clean_up_tokenization_spaces=False,
	unk_token="<\|unk\|>",
	bos_token="<\|startoftext\|>",
	eos_token="<\|endoftext\|>",
	add_bos_token=False,
	add_eos_token=False,
	use_default_system_prompt=False,
	**kwargs,
	):
	super().__init__(
	vocab_file=vocab_file,
	tokenizer_file=tokenizer_file,
	clean_up_tokenization_spaces=clean_up_tokenization_spaces,
	unk_token=unk_token,
	bos_token=bos_token,
	eos_token=eos_token,
	use_default_system_prompt=use_default_system_prompt,
	**kwargs,
	)
	self._add_bos_token = add_bos_token
	self._add_eos_token = add_eos_token
	self.update_post_processor()
	self.use_default_system_prompt = use_default_system_prompt
	self.vocab_file = vocab_file

	@property
	def can_save_slow_tokenizer(self) -> bool:
	return os.path.isfile(self.vocab_file) if self.vocab_file else False

	def update_post_processor(self):
	"""
	Updates the underlying post processor with the current `bos_token` and `eos_token`.
	"""
	bos = self.bos_token
	bos_token_id = self.bos_token_id
	if bos is None and self.add_bos_token:
	raise ValueError("add_bos_token = True but bos_token = None")

	eos = self.eos_token
	eos_token_id = self.eos_token_id
	if eos is None and self.add_eos_token:
	raise ValueError("add_eos_token = True but eos_token = None")

	single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
	pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"

	special_tokens = []
	if self.add_bos_token:
	special_tokens.append((bos, bos_token_id))
	if self.add_eos_token:
	special_tokens.append((eos, eos_token_id))
	self._tokenizer.post_processor = processors.TemplateProcessing(
	single=single, pair=pair, special_tokens=special_tokens
	)

	@property
	def add_eos_token(self):
	return self._add_eos_token

	@property
	def add_bos_token(self):
	return self._add_bos_token

	@add_eos_token.setter
	def add_eos_token(self, value):
	self._add_eos_token = value
	self.update_post_processor()

	@add_bos_token.setter
	def add_bos_token(self, value):
	self._add_bos_token = value
	self.update_post_processor()

	def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
	if not self.can_save_slow_tokenizer:
	raise ValueError(
	"Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
	"tokenizer."
	)

	if not os.path.isdir(save_directory):
	logger.error(f"Vocabulary path ({save_directory}) should be a directory")
	return
	out_vocab_file = os.path.join(
	save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
	)

	if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
	copyfile(self.vocab_file, out_vocab_file)

	return (out_vocab_file,)


	def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
	bos_token_id = [self.bos_token_id] if self.add_bos_token else []
	eos_token_id = [self.eos_token_id] if self.add_eos_token else []

	output = bos_token_id + token_ids_0 + eos_token_id

	if token_ids_1 is not None:
	output = output + bos_token_id + token_ids_1 + eos_token_id

	return output