venv uvicorn python 虚拟服务器外网无法访问
python -m venv .venv
source ./.venv/bin/activate
pip install -r requirements.txt
./run.sh
source ./.venv/bin/activate
uvicorn main:app --reload
虚拟web服务器外网访问控制台启动命令用以下代码启动
uvicorn main:app --host 0.0.0.0 --port 8501 --reload
启动到后台
nohup uvicorn main:app --host 0.0.0.0 --port 8501 --reload &
main.py
import xmltodict
import anthropic
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, StreamingResponse
from prompt_constructors import *
from claude import ClaudeLlm # claude.py から ClaudeLlm クラスをインポート
import os
import base64
app = FastAPI()
api_key = "your key"
client = anthropic.Anthropic(api_key=api_key)
# @app.middleware("http")
# async def log_body(request: Request, call_next):
# body = await request.body()
# print("HTTP REQUEST BODY: ", body)
# return await call_next(request)
# The anthropic API does not have a method to list models, so we are hard coding the models here
@app.get("/models")
async def list_models() -> JSONResponse:
return JSONResponse(content={"data": [
{"id": "claude-3-sonnet-20240229", "name": "Anthropic Claude 3 Sonnet"},
{"id": "anthropic.claude-3-sonnet-20240229-v1:0", "name": "AWS Bedrock Anthropic Claude 3 Sonnet"},
{"id": "claude-3-opus-20240229", "name": "Anthropic Claude 3 Opus"},
]
})
def map_req(req: dict) -> dict:
messages = req["messages"]
mapped_req = {
"messages": messages,
}
return mapped_req
@app.post("/chat/completions")
async def completions(request: Request) -> StreamingResponse:
data = await request.body()
req = map_req(json.loads(data))
messages=req["messages"]
claude = ClaudeLlm(client, messages)
resp = claude.generate_responses("claude-3-opus-20240229")
return StreamingResponse(resp, media_type="application/x-ndjson")
def map_resp(response) -> str:
data = json.loads(response)
finish_reason = None
parsed_tool_calls = []
for message in data["content"]:
if 'text' in message.keys() and message["text"].startswith("<function_calls>"):
xml_tool_calls = message["text"] + "</function_calls>"
tool_calls = xmltodict.parse(xml_tool_calls)
if tool_calls["function_calls"]["invoke"] is list:
for key, value in tool_calls["function_calls"]["invoke"].items():
parsed_tool_calls.append({
"index": 0,
"id": value['tool_name'],
"type": "function",
"function": {
"name": value["tool_name"],
"arguments": str(value["parameters"]),
},
})
else:
parsed_tool_calls.append({
"index": 0,
"id": tool_calls["function_calls"]["invoke"]["tool_name"],
"type": "function",
"function": {
"name": tool_calls["function_calls"]["invoke"]["tool_name"],
"arguments": json.dumps(tool_calls["function_calls"]["invoke"]["parameters"]),
},
})
message.pop("text", None)
message.pop("type", None)
message["tool_calls"] = parsed_tool_calls
message["content"] = None
message["role"] = "assistant"
if 'text' in message.keys():
message["content"] = message["text"]
if "stop_reason" in data.keys() and data["stop_reason"] == "stop_sequence":
finish_reason = "tool_calls"
if "stop_reason" in data.keys() and data["stop_reason"] == "end_turn":
finish_reason = "stop"
translated = {
"id": data["id"],
"object": "chat.completion.chunk",
"created": 0,
"model": data["model"],
"system_fingerprint": "TEMP",
"choices": [
{
"index": 0,
"delta": data["content"][0],
},
],
"finish_reason": finish_reason,
}
return json.dumps(translated)
claude.py
import pandas as pd
from forex_python.converter import CurrencyRates
import time
import anthropic
from anthropic.types.message_stream_event import MessageStartEvent, MessageDeltaEvent, ContentBlockDeltaEvent
class ClaudeLlm:
def __init__(self, client, user_input):
self.client = client
self.user_input = user_input
self.cost_df = pd.DataFrame(columns=["Model", "Input Tokens", "Output Tokens", "Input Cost", "Output Cost", "Total Cost", "総計_円換算", "処理時間"])
def convert_usd_to_jpy(self, usd_amount):
c = CurrencyRates()
try:
rate = c.get_rate('USD', 'JPY')
jpy_rate = (f"為替レート: {rate:.2f}円/ドル")
return usd_amount * rate, jpy_rate
except Exception as e:
rate = 150 # フォールバックとして使用する為替レート
jpy_rate = (f"為替レート: {rate:.2f}円/ドル想定")
return usd_amount * rate, jpy_rate
def calculate_cost(self, model, input_tokens, output_tokens):
token_costs = {
"claude-3-opus-20240229": {"input": 0.000015, "output": 0.000075},
"claude-3-sonnet-20240229": {"input": 0.000003, "output": 0.000015},
}
model_costs = token_costs[model]
input_cost = input_tokens * model_costs["input"]
output_cost = output_tokens * model_costs["output"]
total_cost = input_cost + output_cost
return input_cost, output_cost, total_cost
def generate_responses(self, model_name):
start_time = time.time()
input_tokens = 0
output_tokens = 0
try:
with self.client.messages.stream(
model=model_name,
max_tokens=1024,
messages=[{"role": "user", "content": self.user_input}],
) as stream:
for event in stream:
if isinstance(event, MessageStartEvent):
usage_info = event.message.usage
input_tokens = usage_info.input_tokens
elif isinstance(event, MessageDeltaEvent):
output_tokens = event.usage.output_tokens
elif isinstance(event, ContentBlockDeltaEvent):
return_text = event.delta.text
yield return_text
except anthropic.APIStatusError as e:
error_response = e.response.json()
if 'error' in error_response and error_response['error'].get('type') == 'overloaded_error':
return "APIが過負荷状態です。しばらくしてから再試行してください。"
input_cost, output_cost, total_cost = self.calculate_cost(model_name, input_tokens, output_tokens)
jpy_total_cost, _ = self.convert_usd_to_jpy(total_cost)
end_time = time.time()
response_time = end_time - start_time
new_row = {
"Model": model_name,
"Input Tokens": input_tokens,
"Output Tokens": output_tokens,
"Input Cost": f"${input_cost:.6f}",
"Output Cost": f"${output_cost:.6f}",
"Total Cost": f"${total_cost:.6f}",
"総計_円換算": f"¥{jpy_total_cost:.3f}",
"処理時間": f"{response_time:.2f}秒"
}
new_row_df = pd.DataFrame([new_row])
self.cost_df = pd.concat([self.cost_df, new_row_df], ignore_index=True)
return self.cost_df
curl -H “Content-Type: application/json” -X POST -d ‘{“user_id”: “123”, “coin”:100, “success”:1, “msg”:“OK!” }’ “http://192.168.0.1:8001/test”
netstat -ntulp
lsof -i:8000