mirror of
https://github.com/openai/gpt-2-output-dataset
synced 2025-08-22 01:51:41 +00:00
update the download URLs to azure CDN
This commit is contained in:
parent
d6f4e2956b
commit
2c102400c7
@ -12,13 +12,13 @@ For motivations and discussions regarding the release of this detector model, pl
|
||||
Download the weights for the fine-tuned `roberta-base` model (478 MB):
|
||||
|
||||
```bash
|
||||
wget https://storage.googleapis.com/gpt-2/detector-models/v1/detector-base.pt
|
||||
wget https://openaipublic.azureedge.net/gpt-2/detector-models/v1/detector-base.pt
|
||||
```
|
||||
|
||||
or `roberta-large` model (1.5 GB):
|
||||
|
||||
```bash
|
||||
wget https://storage.googleapis.com/gpt-2/detector-models/v1/detector-large.pt
|
||||
wget https://openaipublic.azureedge.net/gpt-2/detector-models/v1/detector-large.pt
|
||||
```
|
||||
|
||||
These RoBERTa-based models are fine-tuned with a mixture of temperature-1 and nucleus sampling outputs,
|
||||
|
@ -30,7 +30,7 @@ def download(*datasets, data_dir='data'):
|
||||
if os.path.isfile(output_file):
|
||||
continue
|
||||
|
||||
r = requests.get("https://storage.googleapis.com/gpt-2/output-dataset/v1/" + filename, stream=True)
|
||||
r = requests.get("https://openaipublic.azureedge.net/gpt-2/output-dataset/v1/" + filename, stream=True)
|
||||
|
||||
with open(output_file, 'wb') as f:
|
||||
file_size = int(r.headers["content-length"])
|
||||
|
@ -17,7 +17,7 @@ for ds in [
|
||||
]:
|
||||
for split in ['train', 'valid', 'test']:
|
||||
filename = ds + "." + split + '.jsonl'
|
||||
r = requests.get("https://openaipublic.blob.core.windows.net/gpt-2/output-dataset/v1/" + filename, stream=True)
|
||||
r = requests.get("https://openaipublic.azureedge.net/gpt-2/output-dataset/v1/" + filename, stream=True)
|
||||
|
||||
with open(os.path.join(subdir, filename), 'wb') as f:
|
||||
file_size = int(r.headers["content-length"])
|
||||
|
Loading…
x
Reference in New Issue
Block a user