Merge branch 'merge-pcre' into 10.1

d233fd14 · Oleksandr Byelkin · 4fc8961d · c1291d7a · d233fd14 · d233fd14
Commit d233fd14 authored Apr 30, 2020 by Oleksandr Byelkin
24 changed files
--- a/pcre/AUTHORS
+++ b/pcre/AUTHORS
@@ -8,7 +8,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2020 University of Cambridge
 All rights reserved


@@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2020 Zoltan Herczeg
 All rights reserved.


@@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2020 Zoltan Herczeg
 All rights reserved.



--- a/pcre/ChangeLog
+++ b/pcre/ChangeLog
@@ -5,6 +5,35 @@ Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All
 development is happening in the PCRE2 10.xx series.


+Version 8.44 12 February-2020
+-----------------------------
+
+1. Setting --enable-jit=auto for an out-of-tree build failed because the
+source directory wasn't in the search path for AC_TRY_COMPILE always. Patch
+from Ross Burton.
+
+2. Applied a patch from Michael Shigorin to fix 8.43 build on e2k arch
+with lcc compiler (EDG frontend based); the problem it fixes is:
+
+  lcc: "pcrecpp.cc", line 74: error: declaration aliased to undefined entity
+       "_ZN7pcrecpp2RE6no_argE" [-Werror]
+
+3. Change 2 for 8.43 omitted (*LF) from the list of start-of-pattern items. Now
+added.
+
+4. Fix ARMv5 JIT improper handling of labels right after a constant pool.
+
+5. Small patch to pcreposix.c to set the erroroffset field to -1 immediately
+after a successful compile, instead of at the start of matching to avoid a
+sanitizer complaint (regexec is supposed to be thread safe).
+
+6. Check the size of the number after (?C as it is read, in order to avoid
+integer overflow.
+
+7. Tidy up left shifts to avoid sanitize warnings; also fix one NULL deference
+in pcretest.
+
+
 Version 8.43 23-February-2019
 -----------------------------


--- a/pcre/LICENCE
+++ b/pcre/LICENCE
@@ -25,7 +25,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2020 University of Cambridge
 All rights reserved.


@@ -36,7 +36,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2020 Zoltan Herczeg
 All rights reserved.


@@ -47,7 +47,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2020 Zoltan Herczeg
 All rights reserved.



--- a/pcre/NEWS
+++ b/pcre/NEWS
@@ -5,6 +5,12 @@ Note that this library (now called PCRE1) is now being maintained for bug fixes
 only. New projects are advised to use the new PCRE2 libraries.


+Release 8.44 12-February-2020
+-----------------------------
+
+This is a bug-fix release.
+
+
 Release 8.43 23-February-2019
 -----------------------------


--- a/pcre/README
+++ b/pcre/README
@@ -13,9 +13,10 @@ libraries.
 The latest release of PCRE1 is always available in three alternative formats
 from:

-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
+  https://ftp.pcre.org/pub/pcre/pcre-x.xx.tar.gz
+  https://ftp.pcre.org/pub/pcre/pcre-x.xx.tar.bz2
+  https://ftp.pcre.org/pub/pcre/pcre-x.xx.tar.zip
+

 There is a mailing list for discussion about the development of PCRE at
 pcre-dev@exim.org. You can access the archives and subscribe or manage your
@@ -999,4 +1000,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 10 February 2015
+Last updated: 12 February 2020
--- a/pcre/configure.ac
+++ b/pcre/configure.ac
@@ -9,19 +9,19 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
 dnl be defined as -RC2, for example. For real releases, it should be empty.

 m4_define(pcre_major, [8])
-m4_define(pcre_minor, [43])
+m4_define(pcre_minor, [44])
 m4_define(pcre_prerelease, [])
-m4_define(pcre_date, [2019-02-23])
+m4_define(pcre_date, [2020-02-12])

 # NOTE: The CMakeLists.txt file searches for the above variables in the first
 # 50 lines of this file. Please update that if the variables above are moved.

 # Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre_version, [3:11:2])
-m4_define(libpcre16_version, [2:11:2])
-m4_define(libpcre32_version, [0:11:0])
-m4_define(libpcreposix_version, [0:6:0])
-m4_define(libpcrecpp_version, [0:1:0])
+m4_define(libpcre_version, [3:12:2])
+m4_define(libpcre16_version, [2:12:2])
+m4_define(libpcre32_version, [0:12:0])
+m4_define(libpcreposix_version, [0:7:0])
+m4_define(libpcrecpp_version, [0:2:0])

 AC_PREREQ(2.57)
 AC_INIT(PCRE, pcre_major.pcre_minor[]pcre_prerelease, , pcre)
@@ -159,12 +159,15 @@ AC_ARG_ENABLE(jit,

 if test "$enable_jit" = "auto"; then
  AC_LANG(C)
+  SAVE_CPPFLAGS=$CPPFLAGS
+  CPPFLAGS=-I$srcdir
  AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
  #define SLJIT_CONFIG_AUTO 1
  #include "sljit/sljitConfigInternal.h"
  #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
  #error unsupported
  #endif]])], enable_jit=yes, enable_jit=no)
+  CPPFLAGS=$SAVE_CPPFLAGS
 fi

 # Handle --disable-pcregrep-jit (enabled by default)

--- a/pcre/doc/html/README.txt
+++ b/pcre/doc/html/README.txt
@@ -13,9 +13,10 @@ libraries.
 The latest release of PCRE1 is always available in three alternative formats
 from:

-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
+  https://ftp.pcre.org/pub/pcre/pcre-x.xx.tar.gz
+  https://ftp.pcre.org/pub/pcre/pcre-x.xx.tar.bz2
+  https://ftp.pcre.org/pub/pcre/pcre-x.xx.tar.zip
+

 There is a mailing list for discussion about the development of PCRE at
 pcre-dev@exim.org. You can access the archives and subscribe or manage your
@@ -999,4 +1000,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 10 February 2015
+Last updated: 12 February 2020
--- a/pcre/doc/html/pcre.html
+++ b/pcre/doc/html/pcre.html
@@ -143,7 +143,7 @@ performance.
 One way of guarding against this possibility is to use the
 <b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
 Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
-compile time. This causes an compile time error if a pattern contains a
+compile time. This causes a compile time error if a pattern contains a
 UTF-setting sequence.
 </P>
 <P>

--- a/pcre/doc/html/pcreapi.html
+++ b/pcre/doc/html/pcreapi.html
@@ -1246,7 +1246,7 @@ the following negative numbers:
  PCRE_ERROR_BADOPTION      the value of <i>what</i> was invalid
  PCRE_ERROR_UNSET          the requested field is not set
 </pre>
-The "magic number" is placed at the start of each compiled pattern as an simple
+The "magic number" is placed at the start of each compiled pattern as a simple
 check against passing an arbitrary memory pointer. The endianness error can
 occur if a compiled pattern is saved and reloaded on a different host. Here is
 a typical call of <b>pcre_fullinfo()</b>, to obtain the length of the compiled
@@ -1318,7 +1318,7 @@ returned. For anchored patterns, -2 is returned.
 </pre>
 Return the value of the first data unit (non-UTF character) of any matched
 string in the situation where PCRE_INFO_FIRSTCHARACTERFLAGS returns 1;
-otherwise return 0. The fourth argument should point to an <b>uint_t</b>
+otherwise return 0. The fourth argument should point to a <b>uint_t</b>
 variable.
 </P>
 <P>
@@ -1577,7 +1577,7 @@ returned value 1 (with "z" returned from PCRE_INFO_REQUIREDCHAR), but for
 </pre>
 Return the value of the rightmost literal data unit that must exist in any
 matched string, other than at its start, if such a value has been recorded. The
-fourth argument should point to an <b>uint32_t</b> variable. If there is no such
+fourth argument should point to a <b>uint32_t</b> variable. If there is no such
 value, 0 is returned.
 </P>
 <br><a name="SEC16" href="#TOC1">REFERENCE COUNTS</a><br>

--- a/pcre/doc/html/pcretest.html
+++ b/pcre/doc/html/pcretest.html
@@ -99,23 +99,21 @@ the 16-bit library, or <b>pcre32_xx</b> when using the 32-bit library".
 <br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
 <P>
 <b>-8</b>
-If both the 8-bit library has been built, this option causes the 8-bit library
-to be used (which is the default); if the 8-bit library has not been built,
-this option causes an error.
+If the 8-bit library has been built, this option causes it to be used (this is
+the default). If the 8-bit library has not been built, this option causes an
+error.
 </P>
 <P>
 <b>-16</b>
-If both the 8-bit or the 32-bit, and the 16-bit libraries have been built, this
-option causes the 16-bit library to be used. If only the 16-bit library has been
-built, this is the default (so has no effect). If only the 8-bit or the 32-bit
-library has been built, this option causes an error.
+If the 16-bit library has been built, this option causes it to be used. If only
+the 16-bit library has been built, this is the default. If the 16-bit library
+has not been built, this option causes an error.
 </P>
 <P>
 <b>-32</b>
-If both the 8-bit or the 16-bit, and the 32-bit libraries have been built, this
-option causes the 32-bit library to be used. If only the 32-bit library has been
-built, this is the default (so has no effect). If only the 8-bit or the 16-bit
-library has been built, this option causes an error.
+If the 32-bit library has been built, this option causes it to be used. If only
+the 32-bit library has been built, this is the default. If the 32-bit library
+has not been built, this option causes an error.
 </P>
 <P>
 <b>-b</b>
@@ -1154,9 +1152,9 @@ Cambridge CB2 3QH, England.
 </P>
 <br><a name="SEC17" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 23 February 2017
+Last updated: 10 February 2020
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2020 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE index page</a>.

--- a/pcre/doc/pcre-config.txt
+++ b/pcre/doc/pcre-config.txt
@@ -16,8 +16,8 @@ DESCRIPTION

       pcre-config  returns  the configuration of the installed PCRE libraries
       and the options required to compile a program to use them. Some of  the
-       options  apply  only  to  the  8-bit,  or  16-bit, or 32-bit libraries,
-       respectively, and are not available if only one of those libraries  has
+       options  apply  only  to the 8-bit, or 16-bit, or 32-bit libraries, re-
+       spectively, and are not available if only one of  those  libraries  has
       been built. If an unavailable option is encountered, the "usage" infor-
       mation is output.

@@ -36,37 +36,37 @@ OPTIONS
       --version Writes the version number of the installed PCRE libraries  to
                 the standard output.

-       --libs    Writes  to  the  standard  output  the  command  line options
-                 required to link with the 8-bit PCRE library (-lpcre on  many
+       --libs    Writes  to  the  standard output the command line options re-
+                 quired to link with the 8-bit PCRE library  (-lpcre  on  many
                 systems).

-       --libs16  Writes  to  the  standard  output  the  command  line options
-                 required to link with the 16-bit PCRE  library  (-lpcre16  on
-                 many systems).
+       --libs16  Writes  to  the  standard output the command line options re-
+                 quired to link with the 16-bit PCRE library (-lpcre16 on many
+                 systems).

-       --libs32  Writes  to  the  standard  output  the  command  line options
-                 required to link with the 32-bit PCRE  library  (-lpcre32  on
-                 many systems).
+       --libs32  Writes  to  the  standard output the command line options re-
+                 quired to link with the 32-bit PCRE library (-lpcre32 on many
+                 systems).

       --libs-cpp
-                 Writes  to  the  standard  output  the  command  line options
-                 required to link with PCRE's C++ wrapper  library  (-lpcrecpp
+                 Writes  to  the  standard output the command line options re-
+                 quired to link with PCRE's  C++  wrapper  library  (-lpcrecpp
                 -lpcre on many systems).

       --libs-posix
-                 Writes  to  the  standard  output  the  command  line options
-                 required to  link  with  PCRE's  POSIX  API  wrapper  library
+                 Writes  to  the  standard output the command line options re-
+                 quired  to  link  with  PCRE's  POSIX  API  wrapper   library
                 (-lpcreposix -lpcre on many systems).

-       --cflags  Writes  to  the  standard  output  the  command  line options
-                 required to compile files that use  PCRE  (this  may  include
-                 some -I options, but is blank on many systems).
+       --cflags  Writes  to  the  standard output the command line options re-
+                 quired to compile files that use PCRE (this may include  some
+                 -I options, but is blank on many systems).

       --cflags-posix
-                 Writes  to  the  standard  output  the  command  line options
-                 required to compile files that use PCRE's POSIX  API  wrapper
-                 library  (this  may  include some -I options, but is blank on
-                 many systems).
+                 Writes  to  the  standard output the command line options re-
+                 quired to compile files that use PCRE's POSIX API wrapper li-
+                 brary (this may include some -I options, but is blank on many
+                 systems).


 SEE ALSO

--- a/pcre/doc/pcre.3
+++ b/pcre/doc/pcre.3
@@ -146,7 +146,7 @@ performance.
 One way of guarding against this possibility is to use the
 \fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF.
 Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
-compile time. This causes an compile time error if a pattern contains a
+compile time. This causes a compile time error if a pattern contains a
 UTF-setting sequence.
 .P
 If your application is one that supports UTF, be aware that validity checking

--- a/pcre/doc/pcre.txt
+++ b/pcre/doc/pcre.txt
--- a/pcre/doc/pcreapi.3
+++ b/pcre/doc/pcreapi.3
@@ -1227,7 +1227,7 @@ the following negative numbers:
  PCRE_ERROR_BADOPTION      the value of \fIwhat\fP was invalid
  PCRE_ERROR_UNSET          the requested field is not set
 .sp
-The "magic number" is placed at the start of each compiled pattern as an simple
+The "magic number" is placed at the start of each compiled pattern as a simple
 check against passing an arbitrary memory pointer. The endianness error can
 occur if a compiled pattern is saved and reloaded on a different host. Here is
 a typical call of \fBpcre_fullinfo()\fP, to obtain the length of the compiled
@@ -1294,7 +1294,7 @@ returned. For anchored patterns, -2 is returned.
 .sp
 Return the value of the first data unit (non-UTF character) of any matched
 string in the situation where PCRE_INFO_FIRSTCHARACTERFLAGS returns 1;
-otherwise return 0. The fourth argument should point to an \fBuint_t\fP
+otherwise return 0. The fourth argument should point to a \fBuint_t\fP
 variable.
 .P
 In the 8-bit library, the value is always less than 256. In the 16-bit library
@@ -1560,7 +1560,7 @@ returned value 1 (with "z" returned from PCRE_INFO_REQUIREDCHAR), but for
 .sp
 Return the value of the rightmost literal data unit that must exist in any
 matched string, other than at its start, if such a value has been recorded. The
-fourth argument should point to an \fBuint32_t\fP variable. If there is no such
+fourth argument should point to a \fBuint32_t\fP variable. If there is no such
 value, 0 is returned.
 .
 .

--- a/pcre/doc/pcregrep.txt
+++ b/pcre/doc/pcregrep.txt
@@ -26,8 +26,8 @@ DESCRIPTION
       If you attempt to use delimiters (for example, by surrounding a pattern
       with slashes, as is common in Perl scripts), they  are  interpreted  as
       part  of  the pattern. Quotes can of course be used to delimit patterns
-       on the command line because they are  interpreted  by  the  shell,  and
-       indeed  quotes  are required if a pattern contains white space or shell
+       on the command line because they are interpreted by the shell, and  in-
+       deed  quotes  are  required  if a pattern contains white space or shell
       metacharacters.

       The first argument that follows any option settings is treated  as  the
@@ -37,8 +37,8 @@ DESCRIPTION
       or an argument pattern must be provided.

       If no files are specified, pcregrep reads the standard input. The stan-
-       dard  input  can  also  be  referenced by a name consisting of a single
-       hyphen.  For example:
+       dard  input can also be referenced by a name consisting of a single hy-
+       phen.  For example:

         pcregrep some-pattern /file1 - /file3

@@ -47,8 +47,8 @@ DESCRIPTION
       the start of each line, followed by a colon. However, there are options
       that  can  change  how  pcregrep  behaves. In particular, the -M option
       makes it possible to search for patterns  that  span  line  boundaries.
-       What  defines  a  line  boundary  is  controlled  by the -N (--newline)
-       option.
+       What  defines  a  line boundary is controlled by the -N (--newline) op-
+       tion.

       The amount of memory used for buffering files that are being scanned is
       controlled  by a parameter that can be set by the --buffer-size option.
@@ -66,12 +66,12 @@ DESCRIPTION
       By  default, as soon as one pattern matches a line, no further patterns
       are considered. However, if --colour (or --color) is used to colour the
       matching  substrings, or if --only-matching, --file-offsets, or --line-
-       offsets is used to output only  the  part  of  the  line  that  matched
-       (either shown literally, or as an offset), scanning resumes immediately
+       offsets is used to output only the part of the line that  matched  (ei-
+       ther  shown  literally,  or as an offset), scanning resumes immediately
       following the match, so that further matches on the same  line  can  be
-       found.  If  there  are  multiple  patterns,  they  are all tried on the
-       remainder of the line, but patterns that follow the  one  that  matched
-       are not tried on the earlier part of the line.
+       found.  If  there  are multiple patterns, they are all tried on the re-
+       mainder of the line, but patterns that follow the one that matched  are
+       not tried on the earlier part of the line.

       This  behaviour  means  that  the  order in which multiple patterns are
       specified can affect the output when one of the above options is  used.
@@ -80,11 +80,11 @@ DESCRIPTION
       overlap).

       Patterns  that can match an empty string are accepted, but empty string
-       matches   are   never   recognized.   An   example   is   the   pattern
-       "(super)?(man)?",  in  which  all components are optional. This pattern
-       finds all occurrences of both "super" and  "man";  the  output  differs
-       from  matching  with  "super|man" when only the matching substrings are
-       being shown.
+       matches  are  never  recognized.  An  example  is  the  pattern   "(su-
+       per)?(man)?",  in which all components are optional. This pattern finds
+       all occurrences of both "super" and  "man";  the  output  differs  from
+       matching  with  "super|man" when only the matching substrings are being
+       shown.

       If the LC_ALL or LC_CTYPE environment variable is  set,  pcregrep  uses
       the  value to set a locale when calling the PCRE library.  The --locale
@@ -105,9 +105,9 @@ BINARY FILES

       By  default,  a  file that contains a binary zero byte within the first
       1024 bytes is identified as a binary file, and is processed  specially.
-       (GNU  grep  also  identifies  binary  files  in  this  manner.) See the
-       --binary-files option for a means of changing the way binary files  are
-       handled.
+       (GNU  grep  also identifies binary files in this manner.) See the --bi-
+       nary-files option for a means of changing the way binary files are han-
+       dled.


 OPTIONS
@@ -151,16 +151,16 @@ OPTIONS

       --binary-files=word
                 Specify  how binary files are to be processed. If the word is
-                 "binary" (the default),  pattern  matching  is  performed  on
-                 binary  files,  but  the  only  output is "Binary file <name>
+                 "binary" (the default), pattern matching is performed on  bi-
+                 nary  files,  but  the  only  output  is  "Binary file <name>
                 matches" when a match succeeds. If the word is "text",  which
                 is  equivalent  to  the -a or --text option, binary files are
                 processed in the same way as any other file.  In  this  case,
                 when  a  match  succeeds,  the  output may be binary garbage,
                 which can have nasty effects if sent to a  terminal.  If  the
-                 word  is  "without-match",  which  is  equivalent  to  the -I
-                 option, binary files are  not  processed  at  all;  they  are
-                 assumed not to be of interest.
+                 word  is  "without-match",  which is equivalent to the -I op-
+                 tion, binary files are not processed at all; they are assumed
+                 not to be of interest.

       --buffer-size=number
                 Set  the  parameter that controls how much memory is used for
@@ -201,15 +201,15 @@ OPTIONS
                 ronment variable PCREGREP_COLOUR or PCREGREP_COLOR. The value
                 of this variable should be a string of two numbers, separated
                 by a semicolon. They are copied  directly  into  the  control
-                 string  for  setting  colour  on  a  terminal,  so it is your
-                 responsibility to ensure that they make sense. If neither  of
+                 string  for  setting  colour on a terminal, so it is your re-
+                 sponsibility to ensure that they make sense.  If  neither  of
                 the  environment  variables  is  set,  the default is "1;31",
                 which gives red.

       -D action, --devices=action
-                 If an input path is  not  a  regular  file  or  a  directory,
-                 "action"  specifies  how  it is to be processed. Valid values
-                 are "read" (the default) or "skip" (silently skip the path).
+                 If an input path is not a regular file or a  directory,  "ac-
+                 tion"  specifies  how it is to be processed. Valid values are
+                 "read" (the default) or "skip" (silently skip the path).

       -d action, --directories=action
                 If an input path is a directory, "action" specifies how it is
@@ -218,8 +218,8 @@ OPTIONS
                 "recurse"  (equivalent to the -r option), or "skip" (silently
                 skip the path, the default in Windows environments).  In  the
                 "read"  case,  directories  are read as if they were ordinary
-                 files. In some operating systems  the  effect  of  reading  a
-                 directory like this is an immediate end-of-file; in others it
+                 files. In some operating systems the effect of reading a  di-
+                 rectory  like  this is an immediate end-of-file; in others it
                 may provoke an error.

       -e pattern, --regex=pattern, --regexp=pattern
@@ -249,8 +249,8 @@ OPTIONS
                 whether listed on the command  line,  obtained  from  --file-
                 list, or by scanning a directory. The pattern is a PCRE regu-
                 lar expression, and is matched against the final component of
-                 the  file  name,  not  the  entire  path.  The -F, -w, and -x
-                 options do not apply to this pattern. The option may be given
+                 the  file  name,  not the entire path. The -F, -w, and -x op-
+                 tions do not apply to this pattern. The option may  be  given
                 any number of times in order to specify multiple patterns. If
                 a file name matches both an --include and an  --exclude  pat-
                 tern, it is excluded. There is no short form for this option.
@@ -264,29 +264,29 @@ OPTIONS

       --exclude-dir=pattern
                 Directories whose names match the pattern are skipped without
-                 being processed, whatever  the  setting  of  the  --recursive
-                 option.  This  applies  to all directories, whether listed on
-                 the command line, obtained from --file-list, or by scanning a
+                 being processed, whatever the setting of the --recursive  op-
+                 tion.  This applies to all directories, whether listed on the
+                 command line, obtained from --file-list,  or  by  scanning  a
                 parent  directory.  The pattern is a PCRE regular expression,
                 and is matched against the final component of  the  directory
                 name,  not the entire path. The -F, -w, and -x options do not
                 apply to this pattern. The option may be given any number  of
                 times  in order to specify more than one pattern. If a direc-
-                 tory matches both  --include-dir  and  --exclude-dir,  it  is
-                 excluded. There is no short form for this option.
+                 tory matches both --include-dir and --exclude-dir, it is  ex-
+                 cluded. There is no short form for this option.

       -F, --fixed-strings
                 Interpret  each  data-matching  pattern  as  a  list of fixed
-                 strings, separated by  newlines,  instead  of  as  a  regular
-                 expression.  What  constitutes  a newline for this purpose is
-                 controlled by the --newline option. The -w (match as a  word)
-                 and  -x (match whole line) options can be used with -F.  They
-                 apply to each of the fixed strings. A line is selected if any
+                 strings, separated by newlines, instead of as a  regular  ex-
+                 pression. What constitutes a newline for this purpose is con-
+                 trolled by the --newline option. The -w (match as a word) and
+                 -x  (match whole line) options can be used with -F.  They ap-
+                 ply to each of the fixed strings. A line is selected  if  any
                 of the fixed strings are found in it (subject to -w or -x, if
                 present). This option applies only to the patterns  that  are
                 matched  against  the contents of files; it does not apply to
-                 patterns specified by  any  of  the  --include  or  --exclude
-                 options.
+                 patterns specified by any of the --include or  --exclude  op-
+                 tions.

       -f filename, --file=filename
                 Read  patterns  from  the  file, one per line, and match them
@@ -358,16 +358,16 @@ OPTIONS
       --include=pattern
                 If any --include patterns are specified, the only files  that
                 are  processed  are those that match one of the patterns (and
-                 do not match an --exclude  pattern).  This  option  does  not
-                 affect  directories,  but  it  applies  to all files, whether
-                 listed on the command line, obtained from --file-list, or  by
-                 scanning  a  directory. The pattern is a PCRE regular expres-
-                 sion, and is matched against the final component of the  file
-                 name,  not the entire path. The -F, -w, and -x options do not
-                 apply to this pattern. The option may be given any number  of
-                 times.  If  a  file  name  matches  both  an --include and an
-                 --exclude pattern, it is excluded.  There is  no  short  form
-                 for this option.
+                 do not match an --exclude pattern). This option does not  af-
+                 fect directories, but it applies to all files, whether listed
+                 on the command line, obtained from --file-list, or  by  scan-
+                 ning  a  directory. The pattern is a PCRE regular expression,
+                 and is matched against the final component of the file  name,
+                 not  the entire path. The -F, -w, and -x options do not apply
+                 to this pattern. The option may be given any number of times.
+                 If  a  file  name  matches both an --include and an --exclude
+                 pattern, it is excluded.  There is no short form for this op-
+                 tion.

       --include-from=filename
                 Treat  each  non-empty  line  of  the file as the data for an
@@ -381,8 +381,8 @@ OPTIONS
                 tories that are processed are those that  match  one  of  the
                 patterns  (and  do  not match an --exclude-dir pattern). This
                 applies to all directories, whether  listed  on  the  command
-                 line,  obtained  from  --file-list,  or  by scanning a parent
-                 directory. The pattern is a PCRE regular expression,  and  is
+                 line,  obtained from --file-list, or by scanning a parent di-
+                 rectory. The pattern is a PCRE  regular  expression,  and  is
                 matched  against  the  final component of the directory name,
                 not the entire path. The -F, -w, and -x options do not  apply
                 to this pattern. The option may be given any number of times.
@@ -413,9 +413,9 @@ OPTIONS

       --line-buffered
                 When this option is given, input is read and  processed  line
-                 by  line,  and  the  output  is  flushed after each write. By
-                 default, input is read in large chunks, unless  pcregrep  can
-                 determine  that  it is reading from a terminal (which is cur-
+                 by  line,  and the output is flushed after each write. By de-
+                 fault, input is read in large chunks, unless pcregrep can de-
+                 termine  that  it  is  reading from a terminal (which is cur-
                 rently possible only in Unix-like  environments).  Output  to
                 terminal  is  normally automatically flushed by the operating
                 system. This option can be useful when the input or output is
@@ -437,9 +437,9 @@ OPTIONS
       --locale=locale-name
                 This  option specifies a locale to be used for pattern match-
                 ing. It overrides the value in the LC_ALL or  LC_CTYPE  envi-
-                 ronment  variables.  If  no  locale  is  specified,  the PCRE
-                 library's default (usually the "C" locale) is used. There  is
-                 no short form for this option.
+                 ronment  variables.  If  no locale is specified, the PCRE li-
+                 brary's default (usually the "C" locale) is used. There is no
+                 short form for this option.

       --match-limit=number
                 Processing  some  regular  expression  patterns can require a
@@ -447,26 +447,26 @@ OPTIONS
                 gram  crash  if  not enough is available.  Other patterns may
                 take a very long time to search  for  all  possible  matching
                 strings.  The pcre_exec() function that is called by pcregrep
-                 to do the matching has two  parameters  that  can  limit  the
-                 resources that it uses.
+                 to do the matching has two parameters that can limit the  re-
+                 sources that it uses.

-                 The   --match-limit  option  provides  a  means  of  limiting
-                 resource usage when processing patterns that are not going to
+                 The  --match-limit  option  provides  a means of limiting re-
+                 source usage when processing patterns that are not  going  to
                 match, but which have a very large number of possibilities in
                 their search trees. The classic example  is  a  pattern  that
                 uses  nested unlimited repeats. Internally, PCRE uses a func-
-                 tion called match()  which  it  calls  repeatedly  (sometimes
-                 recursively).  The  limit  set by --match-limit is imposed on
-                 the number of times this function is called during  a  match,
-                 which  has  the effect of limiting the amount of backtracking
-                 that can take place.
+                 tion called match() which it calls repeatedly (sometimes  re-
+                 cursively).  The limit set by --match-limit is imposed on the
+                 number of times this function is called during a match, which
+                 has  the  effect  of limiting the amount of backtracking that
+                 can take place.

                 The --recursion-limit option is similar to --match-limit, but
                 instead of limiting the total number of times that match() is
                 called, it limits the depth of recursive calls, which in turn
                 limits  the  amount of memory that can be used. The recursion
-                 depth is a smaller number than the  total  number  of  calls,
-                 because not all calls to match() are recursive. This limit is
+                 depth is a smaller number than the total number of calls, be-
+                 cause  not  all calls to match() are recursive. This limit is
                 of use only if it is set smaller than --match-limit.

                 There are no short forms for these options. The default  set-
@@ -494,30 +494,30 @@ OPTIONS
                 is read line by line (see --line-buffered.)

       -N newline-type, --newline=newline-type
-                 The  PCRE  library  supports  five  different conventions for
-                 indicating the ends of lines. They are  the  single-character
-                 sequences  CR  (carriage  return) and LF (linefeed), the two-
-                 character sequence CRLF, an "anycrlf" convention, which  rec-
-                 ognizes  any  of the preceding three types, and an "any" con-
-                 vention, in which any Unicode line ending sequence is assumed
-                 to  end a line. The Unicode sequences are the three just men-
+                 The  PCRE library supports five different conventions for in-
+                 dicating the ends of lines. They are the single-character se-
+                 quences CR (carriage return) and LF (linefeed), the two-char-
+                 acter sequence CRLF, an "anycrlf"  convention,  which  recog-
+                 nizes  any of the preceding three types, and an "any" conven-
+                 tion, in which any Unicode line ending sequence is assumed to
+                 end  a  line.  The  Unicode sequences are the three just men-
                 tioned, plus  VT  (vertical  tab,  U+000B),  FF  (form  feed,
                 U+000C),   NEL  (next  line,  U+0085),  LS  (line  separator,
                 U+2028), and PS (paragraph separator, U+2029).

-                 When  the  PCRE  library  is  built,  a  default  line-ending
-                 sequence   is  specified.   This  is  normally  the  standard
-                 sequence for the operating system. Unless otherwise specified
-                 by  this  option,  pcregrep  uses the library's default.  The
-                 possible values for this option are CR, LF, CRLF, ANYCRLF, or
-                 ANY.  This  makes  it  possible to use pcregrep to scan files
-                 that have come from other environments without having to mod-
-                 ify  their  line  endings.  If the data that is being scanned
-                 does not agree with the convention set by this option,  pcre-
-                 grep  may  behave in strange ways. Note that this option does
-                 not apply to files specified by the  -f,  --exclude-from,  or
-                 --include-from options, which are expected to use the operat-
-                 ing system's standard newline sequence.
+                 When the PCRE library is built,  a  default  line-ending  se-
+                 quence  is specified.  This is normally the standard sequence
+                 for the operating system. Unless otherwise specified by  this
+                 option,  pcregrep  uses  the library's default.  The possible
+                 values for this option are CR, LF,  CRLF,  ANYCRLF,  or  ANY.
+                 This  makes  it  possible  to use pcregrep to scan files that
+                 have come from other environments without  having  to  modify
+                 their  line  endings.  If the data that is being scanned does
+                 not agree with the convention set by  this  option,  pcregrep
+                 may  behave  in  strange ways. Note that this option does not
+                 apply to files specified by the -f, --exclude-from, or  --in-
+                 clude-from  options,  which are expected to use the operating
+                 system's standard newline sequence.

       -n, --line-number
                 Precede each output line by its line number in the file, fol-
@@ -538,12 +538,12 @@ OPTIONS
                 is, the -A, -B, and -C options are ignored. If there is  more
                 than  one  match in a line, each of them is shown separately.
                 If -o is combined with -v (invert the sense of the  match  to
-                 find  non-matching  lines),  no  output is generated, but the
-                 return code is set appropriately. If the matched  portion  of
-                 the  line is empty, nothing is output unless the file name or
-                 line number are being printed, in which case they  are  shown
-                 on an otherwise empty line. This option is mutually exclusive
-                 with --file-offsets and --line-offsets.
+                 find non-matching lines), no output is generated, but the re-
+                 turn code is set appropriately. If the matched portion of the
+                 line is empty, nothing is output unless the file name or line
+                 number are being printed, in which case they are shown on  an
+                 otherwise  empty line. This option is mutually exclusive with
+                 --file-offsets and --line-offsets.

       -onumber, --only-matching=number
                 Show only the part of the line  that  matched  the  capturing
@@ -579,8 +579,8 @@ OPTIONS
                 it  contains, taking note of any --include and --exclude set-
                 tings. By default, a directory is read as a normal  file;  in
                 some  operating  systems this gives an immediate end-of-file.
-                 This option is a shorthand  for  setting  the  -d  option  to
-                 "recurse".
+                 This option is a shorthand for setting the -d option to  "re-
+                 curse".

       --recursion-limit=number
                 See --match-limit above.
@@ -626,10 +626,10 @@ OPTIONS

 ENVIRONMENT VARIABLES

-       The  environment  variables  LC_ALL  and LC_CTYPE are examined, in that
-       order, for a locale. The first one that is set is  used.  This  can  be
-       overridden  by  the  --locale  option.  If  no  locale is set, the PCRE
-       library's default (usually the "C" locale) is used.
+       The environment variables LC_ALL and LC_CTYPE are examined, in that or-
+       der, for a locale. The first one that is set is used. This can be over-
+       ridden  by the --locale option. If no locale is set, the PCRE library's
+       default (usually the "C" locale) is used.


 NEWLINES
@@ -640,8 +640,8 @@ NEWLINES
       ever  newline sequences they have in the input. However, the setting of
       this option does not affect the interpretation of  files  specified  by
       the -f, --exclude-from, or --include-from options, which are assumed to
-       use the operating system's  standard  newline  sequence,  nor  does  it
-       affect  the  way in which pcregrep writes informational messages to the
+       use the operating system's standard newline sequence, nor does  it  af-
+       fect  the  way  in  which pcregrep writes informational messages to the
       standard error and output streams. For these it uses the string "\n" to
       indicate  newlines,  relying on the C I/O library to convert this to an
       appropriate sequence.
@@ -687,13 +687,13 @@ OPTIONS WITH DATA
         --file /some/file

       Note, however, that if you want to supply a file name beginning with  ~
-       as  data  in  a  shell  command,  and have the shell expand ~ to a home
-       directory, you must separate the file name from the option, because the
+       as  data  in a shell command, and have the shell expand ~ to a home di-
+       rectory, you must separate the file name from the option,  because  the
       shell does not treat ~ specially unless it is at the start of an item.

       The  exceptions  to the above are the --colour (or --color) and --only-
-       matching options, for which the data  is  optional.  If  one  of  these
-       options  does  have  data, it must be given in the first form, using an
+       matching options, for which the data is optional. If one of  these  op-
+       tions  does  have  data,  it  must be given in the first form, using an
       equals character. Otherwise pcregrep will assume that it has no data.


@@ -702,14 +702,14 @@ MATCHING ERRORS
       It is possible to supply a regular expression that takes  a  very  long
       time  to  fail  to  match certain lines. Such patterns normally involve
       nested indefinite repeats, for example: (a+)*\d when matched against  a
-       line  of  a's  with  no  final  digit. The PCRE matching function has a
-       resource limit that causes it to abort in these circumstances. If  this
+       line  of  a's with no final digit. The PCRE matching function has a re-
+       source limit that causes it to abort in these  circumstances.  If  this
       happens, pcregrep outputs an error message and the line that caused the
       problem to the standard error stream. If there are more  than  20  such
       errors, pcregrep gives up.

-       The  --match-limit  option  of  pcregrep can be used to set the overall
-       resource limit; there is a second option called --recursion-limit  that
+       The --match-limit option of pcregrep can be used to set the overall re-
+       source limit; there is a second option  called  --recursion-limit  that
       sets  a limit on the amount of memory (usually stack) that is used (see
       the discussion of these options above).


--- a/pcre/doc/pcretest.1
+++ b/pcre/doc/pcretest.1
-.TH PCRETEST 1 "23 February 2017" "PCRE 8.41"
+.TH PCRETEST 1 "10 February 2020" "PCRE 8.44"
 .SH NAME
 pcretest - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -78,21 +78,19 @@ the 16-bit library, or \fBpcre32_xx\fP when using the 32-bit library".
 .rs
 .TP 10
 \fB-8\fP
-If both the 8-bit library has been built, this option causes the 8-bit library
-to be used (which is the default); if the 8-bit library has not been built,
-this option causes an error.
+If the 8-bit library has been built, this option causes it to be used (this is
+the default). If the 8-bit library has not been built, this option causes an
+error.
 .TP 10
 \fB-16\fP
-If both the 8-bit or the 32-bit, and the 16-bit libraries have been built, this
-option causes the 16-bit library to be used. If only the 16-bit library has been
-built, this is the default (so has no effect). If only the 8-bit or the 32-bit
-library has been built, this option causes an error.
+If the 16-bit library has been built, this option causes it to be used. If only
+the 16-bit library has been built, this is the default. If the 16-bit library
+has not been built, this option causes an error.
 .TP 10
 \fB-32\fP
-If both the 8-bit or the 16-bit, and the 32-bit libraries have been built, this
-option causes the 32-bit library to be used. If only the 32-bit library has been
-built, this is the default (so has no effect). If only the 8-bit or the 16-bit
-library has been built, this option causes an error.
+If the 32-bit library has been built, this option causes it to be used. If only
+the 32-bit library has been built, this is the default. If the 32-bit library
+has not been built, this option causes an error.
 .TP 10
 \fB-b\fP
 Behave as if each pattern has the \fB/B\fP (show byte code) modifier; the
@@ -1155,6 +1153,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 23 February 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 10 February 2020
+Copyright (c) 1997-2020 University of Cambridge.
 .fi
--- a/pcre/doc/pcretest.txt
+++ b/pcre/doc/pcretest.txt
@@ -47,15 +47,15 @@ INPUT DATA FORMAT
 PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES

       From release 8.30, two separate PCRE libraries can be built. The origi-
-       nal  one  supports  8-bit  character  strings, whereas the newer 16-bit
-       library supports  character  strings  encoded  in  16-bit  units.  From
-       release  8.32,  a  third  library  can  be  built, supporting character
-       strings encoded in 32-bit units. The pcretest program can  be  used  to
-       test all three libraries. However, it is itself still an 8-bit program,
-       reading 8-bit input and writing 8-bit output.  When testing the  16-bit
-       or  32-bit  library, the patterns and data strings are converted to 16-
-       or 32-bit format before being passed to  the  PCRE  library  functions.
-       Results are converted to 8-bit for output.
+       nal  one supports 8-bit character strings, whereas the newer 16-bit li-
+       brary supports character strings encoded in 16-bit units. From  release
+       8.32,  a  third  library can be built, supporting character strings en-
+       coded in 32-bit units. The pcretest program can be  used  to  test  all
+       three  libraries. However, it is itself still an 8-bit program, reading
+       8-bit input and writing 8-bit  output.   When  testing  the  16-bit  or
+       32-bit  library,  the patterns and data strings are converted to 16- or
+       32-bit format before being passed to the PCRE  library  functions.  Re-
+       sults are converted to 8-bit for output.

       References to functions and structures of the form pcre[16|32]_xx below
       mean "pcre_xx when using the 8-bit library, pcre16_xx  when  using  the
@@ -64,30 +64,27 @@ PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES

 COMMAND LINE OPTIONS

-       -8        If  both the 8-bit library has been built, this option causes
-                 the 8-bit library to be used (which is the default);  if  the
-                 8-bit  library  has  not  been  built,  this option causes an
-                 error.
+       -8        If the 8-bit library has been built, this option causes it to
+                 be used (this is the default). If the 8-bit library  has  not
+                 been built, this option causes an error.

-       -16       If both the 8-bit or the 32-bit,  and  the  16-bit  libraries
-                 have  been built, this option causes the 16-bit library to be
-                 used. If only the 16-bit library has been built, this is  the
-                 default  (so  has no effect). If only the 8-bit or the 32-bit
-                 library has been built, this option causes an error.
+       -16       If  the  16-bit library has been built, this option causes it
+                 to be used. If only the 16-bit library has been  built,  this
+                 is  the  default.  If  the 16-bit library has not been built,
+                 this option causes an error.

-       -32       If both the 8-bit or the 16-bit,  and  the  32-bit  libraries
-                 have  been built, this option causes the 32-bit library to be
-                 used. If only the 32-bit library has been built, this is  the
-                 default  (so  has no effect). If only the 8-bit or the 16-bit
-                 library has been built, this option causes an error.
+       -32       If the 32-bit library has been built, this option  causes  it
+                 to  be  used. If only the 32-bit library has been built, this
+                 is the default. If the 32-bit library  has  not  been  built,
+                 this option causes an error.

       -b        Behave  as  if each pattern has the /B (show byte code) modi-
                 fier; the internal form is output after compilation.

       -C        Output the version number of the PCRE library, and all avail-
-                 able  information  about  the  optional  features  that   are
-                 included,  and  then  exit  with  zero  exit  code. All other
-                 options are ignored.
+                 able  information  about  the  optional features that are in-
+                 cluded, and then exit with zero exit code. All other  options
+                 are ignored.

       -C option Output  information  about a specific build-time option, then
                 exit. This functionality is intended for use in scripts  such
@@ -142,8 +139,8 @@ COMMAND LINE OPTIONS
                 repeatedly with different limits.

       -m        Output  the  size  of each compiled pattern after it has been
-                 compiled.  This  is  equivalent  to adding /M to each regular
-                 expression. The size is given in bytes for both libraries.
+                 compiled. This is equivalent to adding /M to each regular ex-
+                 pression. The size is given in bytes for both libraries.

       -O        Behave  as  if each pattern has the /O modifier, that is dis-
                 able auto-possessification for all patterns.
@@ -188,22 +185,22 @@ COMMAND LINE OPTIONS
                 after a match or no match when JIT-compiled code was actually
                 used.

-                 Note that there are pattern options  that  can  override  -s,
-                 either specifying no studying at all, or suppressing JIT com-
-                 pilation.
+                 Note that there are pattern options that can override -s, ei-
+                 ther specifying no studying at all, or suppressing JIT compi-
+                 lation.

                 If  the  /I  or /D option is present on a pattern (requesting
-                 output  about  the  compiled  pattern), information about the
-                 result of studying is not included when  studying  is  caused
-                 only  by  -s  and neither -i nor -d is present on the command
-                 line. This behaviour means that the output  from  tests  that
-                 are  run with and without -s should be identical, except when
-                 options that output information about the actual running of a
-                 match are set.
-
-                 The  -M,  -t,  and  -tm options, which give information about
-                 resources used, are likely to produce different  output  with
-                 and  without  -s.  Output may also differ if the /C option is
+                 output about the compiled pattern), information about the re-
+                 sult of studying is not included when studying is caused only
+                 by -s and neither -i nor -d is present on the  command  line.
+                 This  behaviour means that the output from tests that are run
+                 with and without -s should be identical, except when  options
+                 that  output  information about the actual running of a match
+                 are set.
+
+                 The -M, -t, and -tm options, which give information about re-
+                 sources used, are likely to produce different output with and
+                 without -s. Output may  also  differ  if  the  /C  option  is
                 present on an individual pattern. This uses callouts to trace
                 the the matching process, and this may be  different  between
                 studied  and  non-studied  patterns.  If the pattern contains
@@ -397,9 +394,9 @@ PATTERN MODIFIERS
         /^abc/m<CRLF>

       As well as turning on  the  PCRE_UTF8/16/32  option,  the  /8  modifier
-       causes all non-printing characters in  output  strings  to  be  printed
-       using the \x{hh...} notation. Otherwise, those less than 0x100 are out-
-       put in hex without the curly brackets.
+       causes  all non-printing characters in output strings to be printed us-
+       ing the \x{hh...} notation. Otherwise, those less than 0x100 are output
+       in hex without the curly brackets.

       Full  details  of  the PCRE options are given in the pcreapi documenta-
       tion.
@@ -410,11 +407,11 @@ PATTERN MODIFIERS
       requested  by  the  /g  or  /G modifier. After finding a match, PCRE is
       called again to search the remainder of the subject string. The differ-
       ence between /g and /G is that the former uses the startoffset argument
-       to  pcre[16|32]_exec()  to  start  searching  at a new point within the
-       entire string (which is in effect what Perl does), whereas  the  latter
+       to pcre[16|32]_exec() to start searching at a new point within the  en-
+       tire  string  (which  is  in effect what Perl does), whereas the latter
       passes over a shortened substring.  This  makes  a  difference  to  the
-       matching process if the pattern  begins  with  a  lookbehind  assertion
-       (including \b or \B).
+       matching process if the pattern begins with a lookbehind assertion (in-
+       cluding \b or \B).

       If any call to pcre[16|32]_exec() in a /g or  /G  sequence  matches  an
       empty  string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
@@ -431,11 +428,11 @@ PATTERN MODIFIERS
       There are yet more modifiers for controlling the way pcretest operates.

       The /+ modifier requests that as well as outputting the substring  that
-       matched the entire pattern, pcretest  should  in  addition  output  the
-       remainder  of  the  subject  string. This is useful for tests where the
-       subject contains multiple copies of the same substring. If the +  modi-
-       fier  appears  twice, the same action is taken for captured substrings.
-       In each case the remainder is output on the following line with a  plus
+       matched  the entire pattern, pcretest should in addition output the re-
+       mainder of the subject string. This is useful for tests where the  sub-
+       ject  contains multiple copies of the same substring. If the + modifier
+       appears twice, the same action is taken  for  captured  substrings.  In
+       each  case  the  remainder  is output on the following line with a plus
       character following the capture number. Note that  this  modifier  must
       not  immediately follow the /S modifier because /S+ and /S++ have other
       meanings.
@@ -471,13 +468,13 @@ PATTERN MODIFIERS
       and so on). It does this by calling pcre[16|32]_fullinfo()  after  com-
       piling  a  pattern.  If the pattern is studied, the results of that are
       also output. In this output, the word "char" means a non-UTF character,
-       that is, the value of a single data item  (8-bit,  16-bit,  or  32-bit,
-       depending on the library that is being tested).
+       that is, the value of a single data item (8-bit, 16-bit, or 32-bit, de-
+       pending on the library that is being tested).

       The /K modifier requests pcretest to show names from backtracking  con-
       trol  verbs  that  are  returned  from  calls to pcre[16|32]_exec(). It
-       causes  pcretest  to  create  a  pcre[16|32]_extra block if one has not
-       already been created by a call to pcre[16|32]_study(), and to  set  the
+       causes pcretest to create a pcre[16|32]_extra block if one has not  al-
+       ready  been  created  by  a call to pcre[16|32]_study(), and to set the
       PCRE_EXTRA_MARK flag and the mark field  within  it,  every  time  that
       pcre[16|32]_exec()  is  called.  If  the  variable  that the mark field
       points to is  non-NULL  for  a  match,  non-match,  or  partial  match,
@@ -494,8 +491,8 @@ PATTERN MODIFIERS
       pcre[16|32]_maketables()  is  called to build a set of character tables
       for the locale, and this is then passed to  pcre[16|32]_compile()  when
       compiling  the regular expression. Without an /L (or /T) modifier, NULL
-       is  passed  as  the  tables  pointer;  that  is, /L applies only to the
-       expression on which it appears.
+       is passed as the tables pointer; that is, /L applies only  to  the  ex-
+       pression on which it appears.

       The  /M  modifier  causes the size in bytes of the memory block used to
       hold the compiled pattern to be output. This does not include the  size
@@ -508,9 +505,9 @@ PATTERN MODIFIERS
       external  function  that  is passed to PCRE and used for stack checking
       during compilation (see the pcreapi documentation for details).

-       The  /S  modifier  causes  pcre[16|32]_study()  to  be called after the
-       expression has been compiled, and the results used when the  expression
-       is matched. There are a number of qualifying characters that may follow
+       The /S modifier causes pcre[16|32]_study() to be called after  the  ex-
+       pression has been compiled, and the results used when the expression is
+       matched. There are a number of qualifying characters  that  may  follow
       /S.  They may appear in any order.

       If /S is followed by an exclamation mark, pcre[16|32]_study() is called
@@ -582,8 +579,8 @@ PATTERN MODIFIERS
         /W    REG_UCP        )   the POSIX standard
         /8    REG_UTF8       )

-       The /+ modifier works as  described  above.  All  other  modifiers  are
-       ignored.
+       The  /+  modifier works as described above. All other modifiers are ig-
+       nored.

   Locking out certain modifiers

@@ -611,10 +608,10 @@ PATTERN MODIFIERS
         < forbid <JS><cr>

       There must be a single space between < and "forbid" for this feature to
-       be  recognised.  If  there  is not, the line is interpreted either as a
-       request to re-load a pre-compiled pattern (see  "SAVING  AND  RELOADING
-       COMPILED  PATTERNS"  below) or, if there is a another < character, as a
-       pattern that uses < as its delimiter.
+       be recognised. If there is not, the line is interpreted either as a re-
+       quest to re-load a pre-compiled pattern (see "SAVING AND RELOADING COM-
+       PILED PATTERNS" below) or, if there is a another < character, as a pat-
+       tern that uses < as its delimiter.


 DATA LINES
@@ -726,8 +723,8 @@ DATA LINES
       makes  it  possible  to  construct invalid UTF-32 sequences for testing
       purposes.

-       The  escapes  that  specify  line ending sequences are literal strings,
-       exactly as shown. No more than one newline setting should be present in
+       The escapes that specify line ending sequences are literal strings, ex-
+       actly  as  shown. No more than one newline setting should be present in
       any data line.

       A backslash followed by anything else just escapes the  anything  else.
@@ -744,8 +741,8 @@ DATA LINES
       different values in the match_limit and match_limit_recursion fields of
       the pcre[16|32]_extra data structure, until it finds the  minimum  num-
       bers for each parameter that allow pcre[16|32]_exec() to complete with-
-       out  error.  Because  this  is testing a specific feature of the normal
-       interpretive pcre[16|32]_exec() execution, the use of any JIT optimiza-
+       out error. Because this is testing a specific feature of the normal in-
+       terpretive  pcre[16|32]_exec()  execution, the use of any JIT optimiza-
       tion that might have been set up by the /S+ qualifier of -s+ option  is
       disabled.

@@ -755,27 +752,27 @@ DATA LINES
       numbers of matching possibilities, it can  become  large  very  quickly
       with  increasing  length  of  subject string. The match_limit_recursion
       number is a measure of how much stack (or, if  PCRE  is  compiled  with
-       NO_RECURSE, how much heap) memory  is  needed  to  complete  the  match
-       attempt.
+       NO_RECURSE,  how  much heap) memory is needed to complete the match at-
+       tempt.

       When \O is used, the value specified may be higher or  lower  than  the
       size set by the -O command line option (or defaulted to 45); \O applies
-       only  to  the  call  of  pcre[16|32]_exec()  for  the  line in which it
-       appears.
+       only to the call of pcre[16|32]_exec() for the line  in  which  it  ap-
+       pears.

       If  the /P modifier was present on the pattern, causing the POSIX wrap-
-       per  API  to  be  used, the only option-setting sequences that have any
-       effect are \B,  \N,  and  \Z,  causing  REG_NOTBOL,  REG_NOTEMPTY,  and
-       REG_NOTEOL, respectively, to be passed to regexec().
+       per API to be used, the only option-setting sequences that have any ef-
+       fect  are \B, \N, and \Z, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NO-
+       TEOL, respectively, to be passed to regexec().


 THE ALTERNATIVE MATCHING FUNCTION

       By  default,  pcretest  uses  the  standard  PCRE  matching   function,
-       pcre[16|32]_exec() to match each  data  line.  PCRE  also  supports  an
-       alternative  matching  function, pcre[16|32]_dfa_test(), which operates
-       in a different way, and has some restrictions. The differences  between
-       the two functions are described in the pcrematching documentation.
+       pcre[16|32]_exec()  to  match each data line. PCRE also supports an al-
+       ternative matching function, pcre[16|32]_dfa_test(), which operates  in
+       a different way, and has some restrictions. The differences between the
+       two functions are described in the pcrematching documentation.

       If a data line contains the \D escape sequence, or if the command  line
       contains  the  -dfa  option, the alternative matching function is used.
@@ -793,16 +790,15 @@ DEFAULT OUTPUT FROM PCRETEST
       that pcre[16|32]_exec() returns, starting with number 0 for the  string
       that  matched  the whole pattern. Otherwise, it outputs "No match" when
       the return is PCRE_ERROR_NOMATCH, and "Partial match:" followed by  the
-       partially   matching   substring   when   pcre[16|32]_exec()    returns
-       PCRE_ERROR_PARTIAL.  (Note  that  this is the entire substring that was
-       inspected during the partial match; it may  include  characters  before
-       the  actual  match  start  if a lookbehind assertion, \K, \b, or \B was
-       involved.) For any other return, pcretest  outputs  the  PCRE  negative
-       error  number  and a short descriptive phrase. If the error is a failed
-       UTF string check, the offset of the start of the failing character  and
-       the  reason  code are also output, provided that the size of the output
-       vector is at least two. Here is an example of an  interactive  pcretest
-       run.
+       partially  matching  substring when pcre[16|32]_exec() returns PCRE_ER-
+       ROR_PARTIAL. (Note that this is the entire substring that was inspected
+       during  the  partial match; it may include characters before the actual
+       match start if a lookbehind assertion, \K, \b, or \B was involved.) For
+       any other return, pcretest outputs the PCRE negative error number and a
+       short descriptive phrase. If the error is a failed  UTF  string  check,
+       the  offset  of  the start of the failing character and the reason code
+       are also output, provided that the size of  the  output  vector  is  at
+       least two. Here is an example of an interactive pcretest run.

         $ pcretest
         PCRE version 8.13 2011-04-30
@@ -892,9 +888,9 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
       (Using the normal matching function on this data  finds  only  "tang".)
       The  longest matching string is always given first (and numbered zero).
       After a PCRE_ERROR_PARTIAL return, the output is "Partial match:", fol-
-       lowed  by  the  partially  matching  substring.  (Note that this is the
-       entire substring that was inspected during the partial  match;  it  may
-       include characters before the actual match start if a lookbehind asser-
+       lowed  by  the partially matching substring. (Note that this is the en-
+       tire substring that was inspected during the partial match; it may  in-
+       clude  characters  before the actual match start if a lookbehind asser-
       tion, \K, \b, or \B was involved.)

       If /g is present on the pattern, the search for further matches resumes
@@ -909,9 +905,9 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
          1: tan
          0: tan

-       Since  the  matching  function  does not support substring capture, the
-       escape sequences that are concerned with captured  substrings  are  not
-       relevant.
+       Since the matching function does not support substring capture, the es-
+       cape sequences that are concerned with captured substrings are not rel-
+       evant.


 RESTARTING AFTER A PARTIAL MATCH
@@ -942,9 +938,9 @@ CALLOUTS
         --->pqrabcdef
           0    ^  ^     \d

-       This  output  indicates  that  callout  number  0  occurred for a match
-       attempt starting at the fourth character of the  subject  string,  when
-       the pointer was at the seventh character of the data, and when the next
+       This  output  indicates  that callout number 0 occurred for a match at-
+       tempt starting at the fourth character of the subject string, when  the
+       pointer  was  at  the  seventh character of the data, and when the next
       pattern item was \d. Just one circumflex is output  if  the  start  and
       current positions are the same.

@@ -963,8 +959,8 @@ CALLOUTS
          0: E*

       If a pattern contains (*MARK) items, an additional line is output when-
-       ever  a  change  of  latest mark is passed to the callout function. For
-       example:
+       ever a change of latest mark is passed to the callout function. For ex-
+       ample:

           re> /a(*MARK:X)bc/C
         data> abc
@@ -999,8 +995,8 @@ NON-PRINTING CHARACTERS

       When  pcretest  is  outputting text that is a matched part of a subject
       string, it behaves in the same way, unless a different locale has  been
-       set  for  the  pattern  (using  the  /L  modifier).  In  this case, the
-       isprint() function to distinguish printing and non-printing characters.
+       set  for  the  pattern  (using  the /L modifier). In this case, the is-
+       print() function to distinguish printing and non-printing characters.


 SAVING AND RELOADING COMPILED PATTERNS
@@ -1020,14 +1016,14 @@ SAVING AND RELOADING COMPILED PATTERNS
       studied with JIT optimization, the JIT data cannot be saved.

       The data that is written is binary.  The  first  eight  bytes  are  the
-       length  of  the  compiled  pattern  data  followed by the length of the
-       optional study data, each written as four  bytes  in  big-endian  order
-       (most  significant  byte  first). If there is no study data (either the
-       pattern was not studied, or studying did not return any data), the sec-
-       ond  length  is  zero. The lengths are followed by an exact copy of the
-       compiled pattern. If there is additional study  data,  this  (excluding
-       any  JIT  data)  follows  immediately after the compiled pattern. After
-       writing the file, pcretest expects to read a new pattern.
+       length  of  the compiled pattern data followed by the length of the op-
+       tional study data, each written as four bytes in big-endian order (most
+       significant  byte first). If there is no study data (either the pattern
+       was not studied, or studying did  not  return  any  data),  the  second
+       length  is  zero. The lengths are followed by an exact copy of the com-
+       piled pattern. If there is additional study data, this  (excluding  any
+       JIT data) follows immediately after the compiled pattern. After writing
+       the file, pcretest expects to read a new pattern.

       A saved pattern can be reloaded into pcretest by  specifying  <  and  a
       file  name  instead  of a pattern. There must be no space between < and
@@ -1066,10 +1062,10 @@ SAVING AND RELOADING COMPILED PATTERNS
       ing and experimentation. It is not intended for production use  because
       only  a  single pattern can be written to a file. Furthermore, there is
       no facility for supplying  custom  character  tables  for  use  with  a
-       reloaded  pattern.  If  the  original  pattern was compiled with custom
-       tables, an attempt to match a subject string using a  reloaded  pattern
-       is  likely to cause pcretest to crash.  Finally, if you attempt to load
-       a file that is not in the correct format, the result is undefined.
+       reloaded  pattern. If the original pattern was compiled with custom ta-
+       bles, an attempt to match a subject string using a reloaded pattern  is
+       likely  to  cause pcretest to crash.  Finally, if you attempt to load a
+       file that is not in the correct format, the result is undefined.


 SEE ALSO
@@ -1087,5 +1083,5 @@ AUTHOR

 REVISION

-       Last updated: 23 February 2017
-       Copyright (c) 1997-2017 University of Cambridge.
+       Last updated: 10 February 2020
+       Copyright (c) 1997-2020 University of Cambridge.
--- a/pcre/pcre_compile.c
+++ b/pcre/pcre_compile.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2018 University of Cambridge
+           Copyright (c) 1997-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -68,7 +68,7 @@ COMPILE_PCREx macro will already be appropriately set. */

 /* Macro for setting individual bits in class bitmaps. */

-#define SETBIT(a,b) a[(b)/8] |= (1 << ((b)&7))
+#define SETBIT(a,b) a[(b)/8] |= (1U << ((b)&7))

 /* Maximum length value to check against when making sure that the integer that
 holds the compiled pattern length does not overflow. We make it a bit less than
@@ -129,8 +129,8 @@ overrun before it actually does run off the end of the data block. */

 /* Private flags added to firstchar and reqchar. */

-#define REQ_CASELESS    (1 << 0)        /* Indicates caselessness */
-#define REQ_VARY        (1 << 1)        /* Reqchar followed non-literal item */
+#define REQ_CASELESS    (1U << 0)        /* Indicates caselessness */
+#define REQ_VARY        (1U << 1)        /* Reqchar followed non-literal item */
 /* Negative values for the firstchar and reqchar flags */
 #define REQ_UNSET       (-2)
 #define REQ_NONE        (-1)
@@ -3612,7 +3612,7 @@ for(;;)
      if (chr > 255) break;
      class_bitset = (pcre_uint8 *)
        ((list_ptr == list ? code : base_end) - list_ptr[2]);
-      if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
+      if ((class_bitset[chr >> 3] & (1U << (chr & 7))) != 0) return FALSE;
      break;

 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
@@ -7133,15 +7133,17 @@ for (;; ptr++)
          int n = 0;
          ptr++;
          while(IS_DIGIT(*ptr))
+            {
            n = n * 10 + *ptr++ - CHAR_0;
-          if (*ptr != CHAR_RIGHT_PARENTHESIS)
+            if (n > 255)
              {
-            *errorcodeptr = ERR39;
+              *errorcodeptr = ERR38;
              goto FAILED;
              }
-          if (n > 255)
+            }
+          if (*ptr != CHAR_RIGHT_PARENTHESIS)
            {
-            *errorcodeptr = ERR38;
+            *errorcodeptr = ERR39;
            goto FAILED;
            }
          *code++ = n;
@@ -7459,7 +7461,7 @@ for (;; ptr++)
              {
              open_capitem *oc;
              recno = GET2(slot, 0);
-              cd->backref_map |= (recno < 32)? (1 << recno) : 1;
+              cd->backref_map |= (recno < 32)? (1U << recno) : 1;
              if (recno > cd->top_backref) cd->top_backref = recno;

              /* Check to see if this back reference is recursive, that it, it
@@ -8070,7 +8072,7 @@ for (;; ptr++)
        item_hwm_offset = cd->hwm - cd->start_workspace;
        *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
        PUT2INC(code, 0, recno);
-        cd->backref_map |= (recno < 32)? (1 << recno) : 1;
+        cd->backref_map |= (recno < 32)? (1U << recno) : 1;
        if (recno > cd->top_backref) cd->top_backref = recno;

        /* Check to see if this back reference is recursive, that it, it
@@ -8683,7 +8685,7 @@ do {
            op == OP_SCBRA || op == OP_SCBRAPOS)
     {
     int n = GET2(scode, 1+LINK_SIZE);
-     int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
+     int new_map = bracket_map | ((n < 32)? (1U << n) : 1);
     if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE;
     }

@@ -8811,7 +8813,7 @@ do {
            op == OP_SCBRA || op == OP_SCBRAPOS)
     {
     int n = GET2(scode, 1+LINK_SIZE);
-     int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
+     int new_map = bracket_map | ((n < 32)? (1U << n) : 1);
     if (!is_startline(scode, new_map, cd, atomcount, inassert)) return FALSE;
     }


--- a/pcre/pcre_jit_compile.c
+++ b/pcre/pcre_jit_compile.c
@@ -3938,10 +3938,10 @@ static sljit_s32 character_to_int32(pcre_uchar chr)
 sljit_s32 value = (sljit_s32)chr;
 #if defined COMPILE_PCRE8
 #define SSE2_COMPARE_TYPE_INDEX 0
-return (value << 24) | (value << 16) | (value << 8) | value;
+return ((unsigned int)value << 24) | ((unsigned int)value << 16) | ((unsigned int)value << 8) | (unsigned int)value;
 #elif defined COMPILE_PCRE16
 #define SSE2_COMPARE_TYPE_INDEX 1
-return (value << 16) | value;
+return ((unsigned int)value << 16) | value;
 #elif defined COMPILE_PCRE32
 #define SSE2_COMPARE_TYPE_INDEX 2
 return value;
@@ -8507,7 +8507,7 @@ if (opcode == OP_ONCE)
  /* We temporarily encode the needs_control_head in the lowest bit.
     Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
     the same value for small signed numbers (including negative numbers). */
-  BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
+  BACKTRACK_AS(bracket_backtrack)->u.framesize = ((unsigned int)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
  }
 return cc + repeat_length;
 }

--- a/pcre/pcrecpp.cc
+++ b/pcre/pcrecpp.cc
@@ -66,7 +66,8 @@ Arg RE::no_arg((void*)NULL);
 // inclusive test if we ever needed it.  (Note that not only the
 // __attribute__ syntax, but also __USER_LABEL_PREFIX__, are
 // gnu-specific.)
-#if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__) \
+       && !defined(__INTEL_COMPILER) && !defined(__LCC__)
 # define ULP_AS_STRING(x)            ULP_AS_STRING_INTERNAL(x)
 # define ULP_AS_STRING_INTERNAL(x)   #x
 # define USER_LABEL_PREFIX_STR       ULP_AS_STRING(__USER_LABEL_PREFIX__)
@@ -91,6 +92,7 @@ static const char *start_options[] = {
  "(*LIMIT_RECURSION=",
  "(*LIMIT_MATCH=",
  "(*CRLF)",
+  "(*LF)",
  "(*CR)",
  "(*BSR_UNICODE)",
  "(*BSR_ANYCRLF)",

--- a/pcre/pcreposix.c
+++ b/pcre/pcreposix.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2018 University of Cambridge
+           Copyright (c) 1997-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -298,6 +298,7 @@ if (preg->re_pcre == NULL)
 (void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT,
  &re_nsub);
 preg->re_nsub = (size_t)re_nsub;
+preg->re_erroffset = (size_t)(-1);  /* No meaning after successful compile */
 return 0;
 }

@@ -335,8 +336,6 @@ if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
 if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
 if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY;

-((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */
-
 /* When no string data is being returned, or no vector has been passed in which
 to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding
 the return data is large enough. */

--- a/pcre/pcretest.c
+++ b/pcre/pcretest.c
@@ -500,7 +500,7 @@ enum {
 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
     defined (SUPPORT_PCRE32)) >= 2

-#define CHAR_SIZE (1 << pcre_mode)
+#define CHAR_SIZE (1U << pcre_mode)

 /* There doesn't seem to be an easy way of writing these macros that can cope
 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
@@ -4443,7 +4443,7 @@ while (!done)

          /* If there is study data, write it. */

-          if (extra != NULL)
+          if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
            {
            if (fwrite(extra->study_data, 1, true_study_size, f) <
                true_study_size)
@@ -4735,7 +4735,7 @@ while (!done)
        if (isdigit(*p))    /* Set copy string */
          {
          while(isdigit(*p)) n = n * 10 + *p++ - '0';
-          copystrings |= 1 << n;
+          copystrings |= 1U << n;
          }
        else if (isalnum(*p))
          {
@@ -4798,7 +4798,7 @@ while (!done)
        if (isdigit(*p))
          {
          while(isdigit(*p)) n = n * 10 + *p++ - '0';
-          getstrings |= 1 << n;
+          getstrings |= 1U << n;
          }
        else if (isalnum(*p))
          {
@@ -5335,7 +5335,7 @@ while (!done)

        for (i = 0; i < 32; i++)
          {
-          if ((copystrings & (1 << i)) != 0)
+          if ((copystrings & (1U << i)) != 0)
            {
            int rc;
            char copybuffer[256];
@@ -5400,7 +5400,7 @@ while (!done)

        for (i = 0; i < 32; i++)
          {
-          if ((getstrings & (1 << i)) != 0)
+          if ((getstrings & (1U << i)) != 0)
            {
            int rc;
            const char *substring;

--- a/pcre/testdata/testinput2
+++ b/pcre/testdata/testinput2
@@ -1380,7 +1380,7 @@
    1X
    123456\P

-//KF>testsavedregex
+//S-KF>testsavedregex

 /abc/IS>testsavedregex
 <testsavedregex

--- a/pcre/testdata/testoutput2
+++ b/pcre/testdata/testoutput2
@@ -5614,9 +5614,8 @@ No match
    123456\P
 No match

-//KF>testsavedregex
+//S-KF>testsavedregex
 Compiled pattern written to testsavedregex
-Study data written to testsavedregex

 /abc/IS>testsavedregex
 Capturing subpattern count = 0