mirror of
https://github.com/openai/gpt-2-output-dataset
synced 2025-08-22 09:58:07 +00:00
63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
import sys
|
|
from functools import reduce
|
|
|
|
from torch import nn
|
|
import torch.distributed as dist
|
|
|
|
|
|
def summary(model: nn.Module, file=sys.stdout):
|
|
def repr(model):
|
|
# We treat the extra repr like the sub-module, one item per line
|
|
extra_lines = []
|
|
extra_repr = model.extra_repr()
|
|
# empty string will be split into list ['']
|
|
if extra_repr:
|
|
extra_lines = extra_repr.split('\n')
|
|
child_lines = []
|
|
total_params = 0
|
|
for key, module in model._modules.items():
|
|
mod_str, num_params = repr(module)
|
|
mod_str = nn.modules.module._addindent(mod_str, 2)
|
|
child_lines.append('(' + key + '): ' + mod_str)
|
|
total_params += num_params
|
|
lines = extra_lines + child_lines
|
|
|
|
for name, p in model._parameters.items():
|
|
if hasattr(p, 'shape'):
|
|
total_params += reduce(lambda x, y: x * y, p.shape)
|
|
|
|
main_str = model._get_name() + '('
|
|
if lines:
|
|
# simple one-liner info, which most builtin Modules will use
|
|
if len(extra_lines) == 1 and not child_lines:
|
|
main_str += extra_lines[0]
|
|
else:
|
|
main_str += '\n ' + '\n '.join(lines) + '\n'
|
|
|
|
main_str += ')'
|
|
if file is sys.stdout:
|
|
main_str += ', \033[92m{:,}\033[0m params'.format(total_params)
|
|
else:
|
|
main_str += ', {:,} params'.format(total_params)
|
|
return main_str, total_params
|
|
|
|
string, count = repr(model)
|
|
if file is not None:
|
|
if isinstance(file, str):
|
|
file = open(file, 'w')
|
|
print(string, file=file)
|
|
file.flush()
|
|
|
|
return count
|
|
|
|
|
|
def grad_norm(model: nn.Module):
|
|
total_norm = 0
|
|
for p in model.parameters():
|
|
param_norm = p.grad.data.norm(2)
|
|
total_norm += param_norm.item() ** 2
|
|
return total_norm ** 0.5
|
|
|
|
def distributed():
|
|
return dist.is_available() and dist.is_initialized()
|