Skip to content

Commit ff532a1

Browse files
committed
[RELEASE] iText pdfOCR 4.1.1
2 parents d559048 + d2d2cf1 commit ff532a1

File tree

137 files changed

+777
-358
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

137 files changed

+777
-358
lines changed

pdfocr-api/pom.xml

Lines changed: 9 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,17 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.1.0</version>
8+
<version>4.1.1</version>
99
</parent>
1010

11+
<properties>
12+
<sharpen.phase>install</sharpen.phase>
13+
14+
<sharpen.projectName>pdfocr-api</sharpen.projectName>
15+
<sharpen.cSharpTargetFolder>./../../../sharp/pdfocr</sharpen.cSharpTargetFolder>
16+
<sharpen.cSharpSourceCodeDestination>itext/itext.pdfocr.api</sharpen.cSharpSourceCodeDestination>
17+
<sharpen.cSharpTestCodeDestination>itext.tests/itext.pdfocr.api.tests</sharpen.cSharpTestCodeDestination>
18+
</properties>
1119
<artifactId>pdfocr-api</artifactId>
1220

1321
<name>pdfOCR API</name>
@@ -50,48 +58,4 @@
5058
</resource>
5159
</resources>
5260
</build>
53-
54-
<profiles>
55-
<profile>
56-
<id>with-sharpen</id>
57-
<build>
58-
<plugins>
59-
<plugin>
60-
<groupId>sharpen</groupId>
61-
<artifactId>sharpen-maven-plugin</artifactId>
62-
<version>1.0-SNAPSHOT</version>
63-
<executions>
64-
<execution>
65-
<phase>install</phase>
66-
<goals>
67-
<goal>sharpen</goal>
68-
</goals>
69-
</execution>
70-
</executions>
71-
<dependencies>
72-
<dependency>
73-
<groupId>sharpen</groupId>
74-
<artifactId>standard-framework-mapping</artifactId>
75-
<version>1.0-SNAPSHOT</version>
76-
</dependency>
77-
</dependencies>
78-
<configuration>
79-
<projectName>pdfocr-api</projectName>
80-
<cSharpTargetFolder>./../../../sharp/pdfocr</cSharpTargetFolder>
81-
<cSharpSourceCodeDestination>itext/itext.pdfocr.api</cSharpSourceCodeDestination>
82-
<cSharpTestCodeDestination>itext.tests/itext.pdfocr.api.tests</cSharpTestCodeDestination>
83-
<buildDotnet>${sharpen.builddotnet}</buildDotnet>
84-
<showDiff>${sharpen.showdiff}</showDiff>
85-
<sourceCodeFiles>
86-
<file>**/src/main/java/**/*.java</file>
87-
</sourceCodeFiles>
88-
<testCodeFiles>
89-
<file>**/src/test/java/**/*.java</file>
90-
</testCodeFiles>
91-
</configuration>
92-
</plugin>
93-
</plugins>
94-
</build>
95-
</profile>
96-
</profiles>
9761
</project>

pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextInfo.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,6 @@ public class TextInfo {
4040

4141
/**
4242
* {@link Rectangle} describing text bbox (lower-left based) expressed in points.
43-
*
44-
* <p>
45-
* TODO DEVSIX-9153: mark this on breaking changes page. Why not return rectangles in image pixels?
46-
* Seems odd, that an OCR engine should be concerned by PDF specific. It would make sense for an engine
47-
* to return results, which could be directly applied to images inputs instead.
4843
*/
4944
private Rectangle bboxRect;
5045

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

pdfocr-onnxtr/pom.xml

Lines changed: 7 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.1.0</version>
8+
<version>4.1.1</version>
99
</parent>
1010

1111
<artifactId>pdfocr-onnxtr</artifactId>
1212

1313
<name>pdfOCR-OnnxTR</name>
1414
<description>pdfOCR-OnnxTR is an iText add-on for Java to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving</description>
15-
1615
<dependencies>
1716
<dependency>
1817
<groupId>com.itextpdf</groupId>
@@ -42,6 +41,12 @@
4241
<sonar.exclusions>
4342
**com/itextpdf/pdfocr/onnxtr/recognition/Vocabulary.java
4443
</sonar.exclusions>
44+
<sharpen.phase>install</sharpen.phase>
45+
<sharpen.projectName>pdfocr-onnxtr</sharpen.projectName>
46+
<sharpen.cSharpTargetFolder>./../../../sharp/pdfocr</sharpen.cSharpTargetFolder>
47+
<sharpen.cSharpSourceCodeDestination>itext/itext.pdfocr.onnxtr</sharpen.cSharpSourceCodeDestination>
48+
<sharpen.cSharpTestCodeDestination>itext.tests/itext.pdfocr.onnxtr.tests</sharpen.cSharpTestCodeDestination>
49+
4550
</properties>
4651

4752
<build>
@@ -56,63 +61,6 @@
5661
</build>
5762

5863
<profiles>
59-
<profile>
60-
<id>qa</id>
61-
<build>
62-
<plugins>
63-
<!-- By default, the japicmp plugin is enabled for qa profile and configured from parent pom.
64-
Disabling it for the new module as it could not find previous version for pdfocr-onnxtr. -->
65-
<plugin>
66-
<groupId>com.github.siom79.japicmp</groupId>
67-
<artifactId>japicmp-maven-plugin</artifactId>
68-
<configuration>
69-
<skip>true</skip>
70-
</configuration>
71-
</plugin>
72-
</plugins>
73-
</build>
74-
</profile>
75-
<profile>
76-
<id>with-sharpen</id>
77-
<build>
78-
<plugins>
79-
<plugin>
80-
<groupId>sharpen</groupId>
81-
<artifactId>sharpen-maven-plugin</artifactId>
82-
<version>1.0-SNAPSHOT</version>
83-
<executions>
84-
<execution>
85-
<phase>install</phase>
86-
<goals>
87-
<goal>sharpen</goal>
88-
</goals>
89-
</execution>
90-
</executions>
91-
<dependencies>
92-
<dependency>
93-
<groupId>sharpen</groupId>
94-
<artifactId>standard-framework-mapping</artifactId>
95-
<version>1.0-SNAPSHOT</version>
96-
</dependency>
97-
</dependencies>
98-
<configuration>
99-
<projectName>pdfocr-onnxtr</projectName>
100-
<cSharpTargetFolder>./../../../sharp/pdfocr</cSharpTargetFolder>
101-
<cSharpSourceCodeDestination>itext/itext.pdfocr.onnxtr</cSharpSourceCodeDestination>
102-
<cSharpTestCodeDestination>itext.tests/itext.pdfocr.onnxtr.tests</cSharpTestCodeDestination>
103-
<buildDotnet>${sharpen.builddotnet}</buildDotnet>
104-
<showDiff>${sharpen.showdiff}</showDiff>
105-
<sourceCodeFiles>
106-
<file>**/src/main/java/**/*.java</file>
107-
</sourceCodeFiles>
108-
<testCodeFiles>
109-
<file>**/src/test/java/**/*.java</file>
110-
</testCodeFiles>
111-
</configuration>
112-
</plugin>
113-
</plugins>
114-
</build>
115-
</profile>
11664
</profiles>
11765

11866
</project>

pdfocr-onnxtr/src/main/java/com/itextpdf/pdfocr/onnxtr/OnnxInputProperties.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ public class OnnxInputProperties {
4747
public static final int EXPECTED_SHAPE_SIZE = 4;
4848

4949
/**
50-
* Per-channel mean, used for normalization. Should be EXPECTED_SHAPE_SIZE length.
50+
* Per-channel mean, used for normalization. Should be EXPECTED_CHANNEL_COUNT length.
5151
*/
5252
private final float[] mean;
5353

5454
/**
55-
* Per-channel standard deviation, used for normalization. Should be EXPECTED_SHAPE_SIZE length.
55+
* Per-channel standard deviation, used for normalization. Should be EXPECTED_CHANNEL_COUNT length.
5656
*/
5757
private final float[] std;
5858

@@ -69,8 +69,8 @@ public class OnnxInputProperties {
6969
/**
7070
* Creates model input properties.
7171
*
72-
* @param mean per-channel mean, used for normalization. Should be EXPECTED_SHAPE_SIZE length
73-
* @param std per-channel standard deviation, used for normalization. Should be EXPECTED_SHAPE_SIZE length
72+
* @param mean per-channel mean, used for normalization. Should be EXPECTED_CHANNEL_COUNT length
73+
* @param std per-channel standard deviation, used for normalization. Should be EXPECTED_CHANNEL_COUNT length
7474
* @param shape target input shape. Should be EXPECTED_SHAPE_SIZE length
7575
* @param symmetricPad whether padding should be symmetrical during input resizing
7676
*/

0 commit comments

Comments
 (0)