Add inference notebook
Browse files- inference_examples.ipynb +2 -10
inference_examples.ipynb
CHANGED
|
@@ -4,15 +4,7 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"id": "intro",
|
| 6 |
"metadata": {},
|
| 7 |
-
"source":
|
| 8 |
-
"# programming-language-identification-100plus\n",
|
| 9 |
-
"\n",
|
| 10 |
-
"Runnable examples for the ModernBERT programming-language identifier.\n",
|
| 11 |
-
"Covers 107 languages. Input is truncated to the first 512 characters\n",
|
| 12 |
-
"(matches the training-time `head` strategy).\n",
|
| 13 |
-
"\n",
|
| 14 |
-
"Point `MODEL_ID` at the local checkpoint directory or the HF repo id."
|
| 15 |
-
]
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"cell_type": "code",
|
|
@@ -20,7 +12,7 @@
|
|
| 20 |
"id": "setup",
|
| 21 |
"metadata": {},
|
| 22 |
"outputs": [],
|
| 23 |
-
"source": "import torch\nfrom transformers import AutoModelForSequenceClassification, AutoTokenizer\n\nMODEL_ID = \"/
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"cell_type": "markdown",
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"id": "intro",
|
| 6 |
"metadata": {},
|
| 7 |
+
"source": "# programming-language-identification-100plus\n\nRunnable examples for the ModernBERT programming-language identifier.\nCovers 107 languages. Input is truncated to the first 512 characters\n(matches the training-time `head` strategy).\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"cell_type": "code",
|
|
|
|
| 12 |
"id": "setup",
|
| 13 |
"metadata": {},
|
| 14 |
"outputs": [],
|
| 15 |
+
"source": "import torch\nfrom transformers import AutoModelForSequenceClassification, AutoTokenizer\n\nMODEL_ID = \"FrameByFrame/programming-language-identification-100plus\"\nDEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n\ntokenizer = AutoTokenizer.from_pretrained(MODEL_ID)\nmodel = AutoModelForSequenceClassification.from_pretrained(\n MODEL_ID,\n attn_implementation=\"eager\",\n torch_dtype=torch.bfloat16,\n).to(DEVICE).eval()\n\nprint(f\"device={DEVICE} num_labels={model.config.num_labels} dtype={model.dtype}\")\n"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"cell_type": "markdown",
|