Tree: Fix linter issues
Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
083df7d585
commit
eb8ccb9783
3 changed files with 22 additions and 12 deletions
2
auth.py
2
auth.py
|
|
@ -37,7 +37,7 @@ def load_auth_keys():
|
|||
api_key = auth_keys_dict["api_key"],
|
||||
admin_key = auth_keys_dict["admin_key"]
|
||||
)
|
||||
except:
|
||||
except Exception as _:
|
||||
new_auth_keys = AuthKeys(
|
||||
api_key = secrets.token_hex(16),
|
||||
admin_key = secrets.token_hex(16)
|
||||
|
|
|
|||
11
main.py
11
main.py
|
|
@ -1,15 +1,18 @@
|
|||
import uvicorn
|
||||
import yaml
|
||||
import pathlib
|
||||
import gen_logging
|
||||
from asyncio import CancelledError
|
||||
from auth import check_admin_key, check_api_key, load_auth_keys
|
||||
from fastapi import FastAPI, Request, HTTPException, Depends
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import StreamingResponse
|
||||
from model import ModelContainer
|
||||
from progress.bar import IncrementalBar
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
import gen_logging
|
||||
from auth import check_admin_key, check_api_key, load_auth_keys
|
||||
from generators import generate_with_semaphore
|
||||
from model import ModelContainer
|
||||
from OAI.types.completion import CompletionRequest
|
||||
from OAI.types.chat_completion import ChatCompletionRequest
|
||||
from OAI.types.lora import LoraCard, LoraList, LoraLoadRequest, LoraLoadResponse
|
||||
|
|
@ -28,9 +31,7 @@ from OAI.utils import (
|
|||
create_chat_completion_response,
|
||||
create_chat_completion_stream_chunk
|
||||
)
|
||||
from typing import Optional
|
||||
from utils import get_generator_error, get_sse_packet, load_progress, unwrap
|
||||
from uuid import uuid4
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
|
|
|||
21
model.py
21
model.py
|
|
@ -1,4 +1,6 @@
|
|||
import gc, time, pathlib
|
||||
import gc
|
||||
import pathlib
|
||||
import time
|
||||
import torch
|
||||
from exllamav2 import(
|
||||
ExLlamaV2,
|
||||
|
|
@ -12,9 +14,10 @@ from exllamav2.generator import(
|
|||
ExLlamaV2StreamingGenerator,
|
||||
ExLlamaV2Sampler
|
||||
)
|
||||
|
||||
from gen_logging import log_generation_params, log_prompt, log_response
|
||||
from typing import List, Optional, Union
|
||||
from utils import coalesce, unwrap
|
||||
from gen_logging import log_generation_params, log_prompt, log_response
|
||||
|
||||
# Bytes to reserve on first device when loading with auto split
|
||||
auto_split_reserve_bytes = 96 * 1024**2
|
||||
|
|
@ -147,7 +150,8 @@ class ModelContainer:
|
|||
progress_callback (function, optional): A function to call for each module loaded. Prototype:
|
||||
def progress(loaded_modules: int, total_modules: int)
|
||||
"""
|
||||
for _ in self.load_gen(progress_callback): pass
|
||||
for _ in self.load_gen(progress_callback):
|
||||
pass
|
||||
|
||||
def load_loras(self, lora_directory: pathlib.Path, **kwargs):
|
||||
"""
|
||||
|
|
@ -243,10 +247,14 @@ class ModelContainer:
|
|||
|
||||
# Unload the entire model if not just unloading loras
|
||||
if not loras_only:
|
||||
if self.model: self.model.unload()
|
||||
if self.model:
|
||||
self.model.unload()
|
||||
self.model = None
|
||||
if self.draft_model: self.draft_model.unload()
|
||||
|
||||
if self.draft_model:
|
||||
self.draft_model.unload()
|
||||
self.draft_model = None
|
||||
|
||||
self.config = None
|
||||
self.cache = None
|
||||
self.tokenizer = None
|
||||
|
|
@ -440,7 +448,8 @@ class ModelContainer:
|
|||
chunk_buffer = ""
|
||||
last_chunk_time = now
|
||||
|
||||
if eos or generated_tokens == max_tokens: break
|
||||
if eos or generated_tokens == max_tokens:
|
||||
break
|
||||
|
||||
# Print response
|
||||
log_response(full_response)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue