Skip to content

Commit 6e1036d

Browse files
authored
Release 1.6.0
2 parents 1ed6ad4 + 9447636 commit 6e1036d

23 files changed

+197
-84
lines changed

Jenkinsfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ node('cuda-module') {
1919
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
2020
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py38 py39
2121
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
22-
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310
22+
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310 py311
2323
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1 || exit 0
2424
"""
2525
currentBuild.result = 'SUCCESS'

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[![License Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
2-
![Python 3.6, 3.7, 3.8, 3.9, 3.10](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-green.svg)
2+
![Python 3.6, 3.7, 3.8, 3.9, 3.10, 3.11](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-green.svg)
33
[![Downloads](https://pepy.tech/badge/deeppavlov)](https://pepy.tech/project/deeppavlov)
44
<img align="right" height="27%" width="27%" src="docs/_static/deeppavlov_logo.png"/>
55

deeppavlov/_meta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '1.5.0'
1+
__version__ = '1.6.0'
22
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
33
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
44
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
{
2+
"chainer": {
3+
"in": ["x"],
4+
"in_y": ["y"],
5+
"pipe": [
6+
{
7+
"class_name": "torch_transformers_ner_preprocessor",
8+
"vocab_file": "{BASE_MODEL}",
9+
"in": ["x"],
10+
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask", "tokens_offsets"]
11+
},
12+
{
13+
"id": "tag_vocab",
14+
"class_name": "simple_vocab",
15+
"unk_token": ["O"],
16+
"save_path": "{MODEL_PATH}/tag.dict",
17+
"load_path": "{MODEL_PATH}/tag.dict",
18+
"fit_on": ["y"],
19+
"in": ["y"],
20+
"out": ["y_ind"]
21+
},
22+
{
23+
"class_name": "torch_transformers_sequence_tagger",
24+
"n_tags": "#tag_vocab.len",
25+
"pretrained_bert": "{BASE_MODEL}",
26+
"save_path": "{MODEL_PATH}/model",
27+
"load_path": "{MODEL_PATH}/model",
28+
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
29+
"in_y": ["y_ind"],
30+
"out": ["y_pred_ind", "probas"]
31+
},
32+
{
33+
"ref": "tag_vocab",
34+
"in": ["y_pred_ind"],
35+
"out": ["y_pred"]
36+
}
37+
],
38+
"out": ["x_tokens", "y_pred"]
39+
},
40+
"metadata": {
41+
"variables": {
42+
"BASE_MODEL": "bert-base-multilingual-cased",
43+
"ROOT_PATH": "~/.deeppavlov",
44+
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
45+
"MODELS_PATH": "{ROOT_PATH}/models",
46+
"MODEL_PATH": "{MODELS_PATH}/ner/{BASE_MODEL}"
47+
},
48+
"download": [
49+
{
50+
"url": "http://files.deeppavlov.ai/v1/ner/ner_bert_base.tar.gz",
51+
"subdir": "{MODEL_PATH}"
52+
}
53+
]
54+
}
55+
}

deeppavlov/core/data/utils.py

+59-51
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def s3_download(url: str, destination: str) -> None:
7878
file_object.download_file(destination, Callback=pbar.update)
7979

8080

81-
def simple_download(url: str, destination: Union[Path, str], headers: Optional[dict] = None) -> None:
81+
def simple_download(url: str, destination: Union[Path, str], headers: Optional[dict] = None, n_tries: int = 3) -> None:
8282
"""Download a file from URL to target location.
8383
8484
Displays a progress bar to the terminal during the download process.
@@ -87,58 +87,66 @@ def simple_download(url: str, destination: Union[Path, str], headers: Optional[d
8787
url: The source URL.
8888
destination: Path to the file destination (including file name).
8989
headers: Headers for file server.
90+
n_tries: Number of retries if download fails.
9091
9192
"""
92-
destination = Path(destination)
93-
destination.parent.mkdir(parents=True, exist_ok=True)
94-
95-
log.info('Downloading from {} to {}'.format(url, destination))
96-
97-
if url.startswith('s3://'):
98-
return s3_download(url, str(destination))
99-
100-
chunk_size = 32 * 1024
101-
temporary = destination.with_suffix(destination.suffix + '.part')
102-
103-
r = requests.get(url, stream=True, headers=headers)
104-
if r.status_code != 200:
105-
raise RuntimeError(f'Got status code {r.status_code} when trying to download {url}')
106-
total_length = int(r.headers.get('content-length', 0))
107-
108-
if temporary.exists() and temporary.stat().st_size > total_length:
109-
temporary.write_bytes(b'') # clearing temporary file when total_length is inconsistent
110-
111-
with temporary.open('ab') as f:
112-
downloaded = f.tell()
113-
if downloaded != 0:
114-
log.warning(f'Found a partial download {temporary}')
115-
with tqdm(initial=downloaded, total=total_length, unit='B', unit_scale=True) as pbar:
116-
while True:
117-
if downloaded != 0:
118-
log.warning(f'Download stopped abruptly, trying to resume from {downloaded} '
119-
f'to reach {total_length}')
120-
headers['Range'] = f'bytes={downloaded}-'
121-
r = requests.get(url, headers=headers, stream=True)
122-
if 'content-length' not in r.headers or \
123-
total_length - downloaded != int(r.headers['content-length']):
124-
raise RuntimeError('It looks like the server does not support resuming downloads.')
125-
126-
try:
127-
for chunk in r.iter_content(chunk_size=chunk_size):
128-
if chunk: # filter out keep-alive new chunks
129-
downloaded += len(chunk)
130-
pbar.update(len(chunk))
131-
f.write(chunk)
132-
except requests.exceptions.ChunkedEncodingError:
133-
if downloaded == 0:
134-
r = requests.get(url, stream=True, headers=headers)
135-
136-
if downloaded >= total_length:
137-
# Note that total_length is 0 if the server didn't return the content length,
138-
# in this case we perform just one iteration and assume that we are done.
139-
break
140-
141-
temporary.rename(destination)
93+
try:
94+
destination = Path(destination)
95+
destination.parent.mkdir(parents=True, exist_ok=True)
96+
97+
log.info('Downloading from {} to {}'.format(url, destination))
98+
99+
if url.startswith('s3://'):
100+
return s3_download(url, str(destination))
101+
102+
chunk_size = 32 * 1024
103+
temporary = destination.with_suffix(destination.suffix + '.part')
104+
105+
r = requests.get(url, stream=True, headers=headers)
106+
if r.status_code != 200:
107+
raise RuntimeError(f'Got status code {r.status_code} when trying to download {url}')
108+
total_length = int(r.headers.get('content-length', 0))
109+
110+
if temporary.exists() and temporary.stat().st_size > total_length:
111+
temporary.write_bytes(b'') # clearing temporary file when total_length is inconsistent
112+
113+
with temporary.open('ab') as f:
114+
downloaded = f.tell()
115+
if downloaded != 0:
116+
log.warning(f'Found a partial download {temporary}')
117+
with tqdm(initial=downloaded, total=total_length, unit='B', unit_scale=True) as pbar:
118+
while True:
119+
if downloaded != 0:
120+
log.warning(f'Download stopped abruptly, trying to resume from {downloaded} '
121+
f'to reach {total_length}')
122+
headers['Range'] = f'bytes={downloaded}-'
123+
r = requests.get(url, headers=headers, stream=True)
124+
if 'content-length' not in r.headers or \
125+
total_length - downloaded != int(r.headers['content-length']):
126+
raise RuntimeError('It looks like the server does not support resuming downloads.')
127+
128+
try:
129+
for chunk in r.iter_content(chunk_size=chunk_size):
130+
if chunk: # filter out keep-alive new chunks
131+
downloaded += len(chunk)
132+
pbar.update(len(chunk))
133+
f.write(chunk)
134+
except requests.exceptions.ChunkedEncodingError:
135+
if downloaded == 0:
136+
r = requests.get(url, stream=True, headers=headers)
137+
138+
if downloaded >= total_length:
139+
# Note that total_length is 0 if the server didn't return the content length,
140+
# in this case we perform just one iteration and assume that we are done.
141+
break
142+
143+
temporary.rename(destination)
144+
except Exception as e:
145+
if n_tries > 0:
146+
log.warning(f'Download failed: {e}, retrying')
147+
simple_download(url, destination, headers, n_tries - 1)
148+
else:
149+
raise e
142150

143151

144152
def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_download: bool = True,

deeppavlov/requirements/datasets.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
datasets>=1.16.0,<2.5.0
1+
datasets>=1.16.0,<2.5.0;python_version<="3.10"
2+
datasets==2.2.*;python_version=="3.11.*"

deeppavlov/requirements/faiss.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
faiss-cpu==1.7.2
1+
faiss-cpu==1.7.2;python_version<="3.10"
2+
faiss-cpu==1.7.4;python_version=="3.11.*"

deeppavlov/requirements/kenlm.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
pypi-kenlm==0.1.20220713
1+
pypi-kenlm==0.1.20220713;python_version<="3.10"
2+
kenlm==0.2.*;python_version=="3.11.*"

docs/features/models/KBQA.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
" \n",
2323
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
2424
"\n",
25-
" 4.3. [Using entity linking and Wiki parser as standalone services for KBQA](#4.3-Using-entity-linking-and-Wiki-parser-as-standalone-services-for-KBQA)\n",
25+
" 4.3. [Using entity linking and Wiki parser as standalone services for KBQA](#4.3-Using-entity-linking-and-Wiki-parser-as-standalone-tools-for-KBQA)\n",
2626
" \n",
2727
"5. [Customize the model](#5.-Customize-the-model)\n",
2828
" \n",
29-
" 5.1. [Train your model from Python](#5.1-Train-your-model-from-Python)\n",
29+
" 5.1. [Description of config parameters](#5.1-Description-of-config-parameters)\n",
3030
" \n",
31-
" 5.2. [Train your model from CLI](#5.2-Train-your-model-from-CLI)\n",
31+
" 5.2. [Train KBQA components](#5.2-Train-KBQA-components)\n",
3232
"\n",
3333
"# 1. Introduction to the task\n",
3434
"\n",

docs/features/models/NER.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
" \n",
2323
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
2424
" \n",
25-
"5. [Evaluate](#6.-Evaluate)\n",
25+
"5. [Evaluate](#5.-Evaluate)\n",
2626
" \n",
2727
" 5.1. [Evaluate from Python](#5.1-Evaluate-from-Python)\n",
2828
" \n",

docs/features/models/SQuAD.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
"`squad_bert` is the name of the model's *config_file*. [What is a Config File?](http://docs.deeppavlov.ai/en/master/intro/configuration.html) \n",
106106
"\n",
107107
"Configuration file defines the model and describes its hyperparameters. To use another model, change the name of the *config_file* here and further.\n",
108-
"The full list of the models with their config names can be found in the [table](#6.-Models-list).\n",
108+
"The full list of the models with their config names can be found in the [table](#3.-Models-list).\n",
109109
"\n",
110110
"# 3. Models list\n",
111111
"\n",

docs/features/models/classification.ipynb

+7-7
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@
162162
"cell_type": "markdown",
163163
"metadata": {},
164164
"source": [
165-
"## 3.2 Predict using CLI\n",
165+
"## 4.2 Predict using CLI\n",
166166
"\n",
167167
"You can also get predictions in an interactive mode through CLI (Command Line Interface)."
168168
]
@@ -198,9 +198,9 @@
198198
"cell_type": "markdown",
199199
"metadata": {},
200200
"source": [
201-
"# 4. Evaluation\n",
201+
"# 5. Evaluation\n",
202202
"\n",
203-
"## 4.1 Evaluate from Python"
203+
"## 5.1 Evaluate from Python"
204204
]
205205
},
206206
{
@@ -218,7 +218,7 @@
218218
"cell_type": "markdown",
219219
"metadata": {},
220220
"source": [
221-
"## 4.2 Evaluate from CLI"
221+
"## 5.2 Evaluate from CLI"
222222
]
223223
},
224224
{
@@ -234,9 +234,9 @@
234234
"cell_type": "markdown",
235235
"metadata": {},
236236
"source": [
237-
"# 5. Customize the model\n",
237+
"# 6. Train the model on your data\n",
238238
"\n",
239-
"## 5.1 Train your model from Python\n",
239+
"## 6.1 Train your model from Python\n",
240240
"\n",
241241
"### Provide your data path\n",
242242
"\n",
@@ -346,7 +346,7 @@
346346
"cell_type": "markdown",
347347
"metadata": {},
348348
"source": [
349-
"## 5.2 Train your model from CLI\n",
349+
"## 6.2 Train your model from CLI\n",
350350
"\n",
351351
"To train the model on your data, create a copy of a config file and change the *data_path* variable in it. After that, train the model using your new *config_file*. You can also change any of the hyperparameters of the model."
352352
]

docs/features/models/few_shot_classification.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@
119119
"\n",
120120
"## 4.2 Predict using Python\n",
121121
"\n",
122-
"After [installing](#4.-Get-started-with-the-model) the model, build it from the config and predict."
122+
"After [installing](#2.-Get-started-with-the-model) the model, build it from the config and predict."
123123
]
124124
},
125125
{
@@ -192,7 +192,7 @@
192192
"cell_type": "markdown",
193193
"metadata": {},
194194
"source": [
195-
"## 4.2 Predict using CLI\n",
195+
"## 4.3 Predict using CLI\n",
196196
"\n",
197197
"You can also get predictions in an interactive mode through CLI (Сommand Line Interface)."
198198
]

docs/features/models/morpho_tagger.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"\n",
2323
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
2424
"\n",
25-
"5. [Customize the model](#4.-Customize-the-model)\n",
25+
"5. [Customize the model](#5.-Customize-the-model)\n",
2626
"\n",
2727
"# 1. Introduction to the task\n",
2828
"\n",

docs/features/models/relation_extraction.ipynb

+28-2
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@
198198
"|NUM | Percents, money, quantities |\n",
199199
"|MISC | Products, including vehicles, weapons, etc. <br> Events, including elections, battles, sporting MISC events, etc. Laws, cases, languages, etc. |\n",
200200
"\n",
201-
"**Model Output**: one or several of the [97 relations](#5.1-Relations-used-in-English-model) found between the given entities; relation id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P26') and relation name ('spouse').\n",
201+
"**Model Output**: one or several of the [97 relations](#6.1-Relations-used-in-English-model) found between the given entities; relation id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P26') and relation name ('spouse').\n",
202202
"\n",
203203
"### Russian"
204204
]
@@ -244,8 +244,34 @@
244244
"- list of entities positions (i.e. all start and end positions of both entities' mentions)\n",
245245
"- list of NER tags of both entities.\n",
246246
"\n",
247-
"**Model Output**: one or several of the [30 relations](#5.2-Relations-used-in-Russian-model) found between the given entities; a Russian relation name (e.g. \"участник\") or an English one, if Russian one is unavailable, and, if applicable, its id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P710').\n",
247+
"**Model Output**: one or several of the [30 relations](#6.2-Relations-used-in-Russian-model) found between the given entities; a Russian relation name (e.g. \"участник\") or an English one, if Russian one is unavailable, and, if applicable, its id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P710').\n",
248248
"\n",
249+
"## 4.2 Predict using CLI\n",
250+
"\n",
251+
"You can also get predictions in an interactive mode through CLI."
252+
]
253+
},
254+
{
255+
"cell_type": "code",
256+
"execution_count": null,
257+
"metadata": {},
258+
"outputs": [],
259+
"source": [
260+
"! python -m deeppavlov interact re_docred [-d]\n",
261+
"! python -m deeppavlov interact re_rured [-d]"
262+
]
263+
},
264+
{
265+
"cell_type": "markdown",
266+
"metadata": {},
267+
"source": [
268+
"`-d` is an optional download key (alternative to `download=True` in Python code). It is used to download the pre-trained model along with embeddings and all other files needed to run the model."
269+
]
270+
},
271+
{
272+
"cell_type": "markdown",
273+
"metadata": {},
274+
"source": [
249275
"# 5. Customize the model\n",
250276
"\n",
251277
"## 5.1 Description of config parameters\n",

docs/features/models/spelling_correction.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"\n",
2323
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
2424
"\n",
25-
"5. [Customize the model](#4.-Customize-the-model)\n",
25+
"5. [Customize the model](#5.-Customize-the-model)\n",
2626
"\n",
2727
" 5.1. [Training configuration](#5.1-Training-configuration)\n",
2828
"\n",

docs/features/models/syntax_parser.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"\n",
2323
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
2424
"\n",
25-
"5. [Customize the model](#4.-Customize-the-model)\n",
25+
"5. [Customize the model](#5.-Customize-the-model)\n",
2626
"\n",
2727
"# 1. Introduction to the task\n",
2828
"\n",

0 commit comments

Comments
 (0)