Text Generation
Transformers
Safetensors
PyTorch
mistral
smarter
code
chemistry
biology
finance
legal
art
Mixture of Experts
Merge
text-generation-inference
music
climate
medical
673_trillion_parameters
Instructions to use ZeppelinCorp/Charm_15 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ZeppelinCorp/Charm_15 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ZeppelinCorp/Charm_15")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("ZeppelinCorp/Charm_15") model = AutoModelForCausalLM.from_pretrained("ZeppelinCorp/Charm_15") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use ZeppelinCorp/Charm_15 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ZeppelinCorp/Charm_15" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ZeppelinCorp/Charm_15", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/ZeppelinCorp/Charm_15
- SGLang
How to use ZeppelinCorp/Charm_15 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ZeppelinCorp/Charm_15" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ZeppelinCorp/Charm_15", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ZeppelinCorp/Charm_15" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ZeppelinCorp/Charm_15", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use ZeppelinCorp/Charm_15 with Docker Model Runner:
docker model run hf.co/ZeppelinCorp/Charm_15
| import os | |
| import sentencepiece as spm | |
| from transformers import AutoTokenizer, PreTrainedTokenizerFast | |
| class TokenizerSetup: | |
| def __init__(self, model_path="tokenizer", model_type="bpe", vocab_size=32000, hf_model=None): | |
| """Initialize tokenizer setup for custom or pretrained use.""" | |
| self.model_path = model_path | |
| self.model_type = model_type.lower() # Normalize: bpe, unigram, char, word | |
| self.vocab_size = vocab_size | |
| self.hf_model = hf_model | |
| self.tokenizer = None | |
| # Validate model_type | |
| valid_types = ["bpe", "unigram", "char", "word"] | |
| if self.model_type not in valid_types: | |
| print(f"⚠️ Invalid model_type '{self.model_type}'. Choose from {valid_types}") | |
| self.model_type = "bpe" | |
| def train_sentencepiece(self, input_file): | |
| """Train a SentencePiece tokenizer with specified settings.""" | |
| if not os.path.exists(input_file): | |
| print(f"⚠️ Input file {input_file} not found! Provide a valid text corpus.") | |
| return | |
| try: | |
| spm.SentencePieceTrainer.Train( | |
| f"--input={input_file} " | |
| f"--model_prefix={self.model_path} " | |
| f"--vocab_size={self.vocab_size} " | |
| f"--model_type={self.model_type} " | |
| f"--pad_id=0 --unk_id=1 --bos_id=2 --eos_id=3 " | |
| f"--user_defined_symbols=<pad>,<unk>,<bos>,<eos>" # Explicit special tokens | |
| ) | |
| print(f"✅ Trained SentencePiece tokenizer. Saved as {self.model_path}.model") | |
| except Exception as e: | |
| print(f"⚠️ Error training SentencePiece: {e}") | |
| def load_tokenizer(self): | |
| """Load either a SentencePiece or Hugging Face tokenizer.""" | |
| try: | |
| if self.hf_model: | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.hf_model) | |
| print(f"✅ Loaded Hugging Face tokenizer from {self.hf_model}") | |
| else: | |
| sp_model = f"{self.model_path}.model" | |
| if not os.path.exists(sp_model): | |
| print(f"⚠️ {sp_model} not found! Train it first.") | |
| return | |
| sp = spm.SentencePieceProcessor(model_file=sp_model) | |
| self.tokenizer = PreTrainedTokenizerFast( | |
| tokenizer_object=sp, | |
| pad_token="<pad>", | |
| unk_token="<unk>", | |
| bos_token="<bos>", | |
| eos_token="<eos>" | |
| ) | |
| print(f"✅ Loaded SentencePiece tokenizer from {sp_model}") | |
| except Exception as e: | |
| print(f"⚠️ Error loading tokenizer: {e}") | |
| def save_tokenizer(self, save_dir="tokenizer/"): | |
| """Save tokenizer files to a directory.""" | |
| if not self.tokenizer: | |
| print("⚠️ No tokenizer loaded to save!") | |
| return | |
| try: | |
| os.makedirs(save_dir, exist_ok=True) | |
| self.tokenizer.save_pretrained(save_dir) | |
| if not self.hf_model: # Copy SentencePiece files | |
| for ext in [".model", ".vocab"]: | |
| src = f"{self.model_path}{ext}" | |
| if os.path.exists(src): | |
| os.system(f"cp {src} {save_dir}") | |
| print(f"✅ Tokenizer saved to {save_dir}") | |
| except Exception as e: | |
| print(f"⚠️ Error saving tokenizer: {e}") | |
| def tokenize_text(self, text, return_tensors=True): | |
| """Tokenize text and show both IDs and decoded output.""" | |
| if not self.tokenizer: | |
| print("⚠️ No tokenizer initialized! Load or train one first.") | |
| return None | |
| try: | |
| tokens = self.tokenizer(text, return_tensors="pt" if return_tensors else None) | |
| ids = tokens["input_ids"] if return_tensors else tokens | |
| decoded = self.tokenizer.decode(ids[0] if return_tensors else ids, skip_special_tokens=True) | |
| print(f"🔹 Token IDs: {ids}") | |
| print(f"🔹 Decoded: {decoded}") | |
| return tokens | |
| except Exception as e: | |
| print(f"⚠️ Error tokenizing text: {e}") | |
| return None | |
| if __name__ == "__main__": | |
| # Setup with Charm 15 context | |
| tokenizer_setup = TokenizerSetup( | |
| model_path="tokenizer", | |
| model_type="bpe", # Matches your earlier BPE config | |
| vocab_size=32000, # Matches Mistral/Charm 15 | |
| hf_model=None # Custom training; set to "mistralai/Mixtral-8x7B-Instruct-v0.1" for pretrained | |
| ) | |
| # Train on Eclipse Corpuz (or other corpus) | |
| input_file = "../datasets/eclipse_corpuz_1.1.txt" # Adjust to your dataset | |
| if not os.path.exists(f"{tokenizer_setup.model_path}.model"): | |
| tokenizer_setup.train_sentencepiece(input_file) | |
| # Load tokenizer | |
| tokenizer_setup.load_tokenizer() | |
| # Save for Charm 15 use | |
| tokenizer_setup.save_tokenizer("../finetuned_charm15/") # Match your training dir | |
| # Test with sample | |
| sample_text = "Charm 15 is an AI model optimized for deep learning and security." | |
| tokenizer_setup.tokenize_text(sample_text) |