An unopimized big ass OpenWebText-based LLM dedicated to what-you-know.
How to use:
import torch
import torch.nn as nn
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = None
# load your model with whatever you want.
# even torch.load()
kirin = model.to(device)
prompt = input()
context_matrix = torch.tensor(encode(prompt), dtype=torch.long, device=device)
response = decode(kirin.generate(context_matrix.unsqueeze(0), max_new_tokens=150)[0].tolist())