8 lines
188 B
Python
8 lines
188 B
Python
import requests
|
|
import time
|
|
|
|
# Benchmark LLM inference
|
|
response = requests.post("http://localhost:1234/v1/completions", json={
|
|
"prompt": "Hello, how are you?",
|
|
"max_tokens": 100
|
|
}) |