
    Q?h%                     D   S r SrSSKJr  SSKJr  SSKJr  SSKJ	r	  / SQr
SR                  \
5      rS	S
/rSR                  \5      rS rS rSS jrS rS rSSKr\R*                  " S\R,                  5      rSS jr\S:X  a  SSKrSSKJr  \R:                  " \5        gg)aX  Helpers for text wrapping, hyphenation, Asian text splitting and kinsoku shori.

How to split a 'big word' depends on the language and the writing system.  This module
works on a Unicode string.  It ought to grow by allowing ore algoriths to be plugged
in based on possible knowledge of the language and desirable 'niceness' of the algorithm.

z3.3.0    )categorystringWidth)_FUZZ)	isUnicode)u&   !',.:;?!")]、。」』】〕］】）uK   々―ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮーヵヶu/   ゛゜・ヽヾゝゞ―‐°′″℃￠％‰ u&   ‘“（[{（〔［｛〈《「『【u   $£@#￥＄￡＠〒§c                     [        U 5      S:  $ )zIs this an Asian character? 0  )ord)chs    I/var/www/html/env/lib/python3.13/site-packages/reportlab/lib/textsplit.pyis_multi_byter   '   s    Gv    c                 F    U  Vs/ s H  n[        X1U5      PM     sn$ s  snf )ub  Returns a list of glyph widths.

>>> getCharWidths('Hello', 'Courier', 10)
[6.0, 6.0, 6.0, 6.0, 6.0]
>>> from reportlab.pdfbase.cidfonts import UnicodeCIDFont
>>> from reportlab.pdfbase.pdfmetrics import registerFont
>>> registerFont(UnicodeCIDFont('HeiseiMin-W3'))
>>> getCharWidths(u'東京', 'HeiseiMin-W3', 10)   #most kanji are 100 ems
[10.0, 10.0]
r   )wordfontNamefontSizeuChars       r   getCharWidthsr   +   s%     AEEuK2EEEs   c                     [        U 5      (       d  U R                  U5      nOU n[        XRU5      n[        XVU5      n[        U 5      (       d0  / nU H&  u  pUR	                  XR                  U5      /5        M(     UnU$ )a  Attempts to break a word which lacks spaces into two parts, the first of which
fits in the remaining space.  It is allowed to add hyphens or whatever it wishes.

This is intended as a wrapper for some language- and user-choice-specific splitting
algorithms.  It should only be called after line breaking on spaces, which covers western
languages and is highly optimised already.  It works on the 'last unsplit word'.

Presumably with further study one could write a Unicode splitting algorithm for text
fragments whick was much faster.

Courier characters should be 6 points wide.
>>> wordSplit('HelloWorld', 30, 'Courier', 10)
[[0.0, 'Hello'], [0.0, 'World']]
>>> wordSplit('HelloWorld', 31, 'Courier', 10)
[[1.0, 'Hello'], [1.0, 'World']]
)r   decoder   	dumbSplitappendencode)r   	maxWidthsr   r   encodinguword
charWidthslineslines2
extraSpacetexts              r   	wordSplitr#   :   su    " T??H%u9Je3ET??"'ZMM:{{8'<=> #(Lr   c                     Sn[        U[        [        45      (       d  U/n[        U 5      (       d   e/ nS=n=pgUS   n[	        U 5      n	XY:  a  X   n
X   nXj-  nUS-  nXh[
        -   :  a  US:  a  X-
  n[        U5      S:  ap  Xu-   S-	  n[        US-
  US5       HU  nX   n[        U5      S:X  d  [        U5      S:  d  M'  US-   nUU:  d  M4  US-   nU[        XU 5      -  nUU   n
U U   nUn  O   U[        ;  a  XWS-   :  a	  US-  nX-  nUR                  XXu R                  5       /5         U[	        U5         nUnSnXY:  a  M  US:  a  UR                  X-
  XS /5        U$ ! [         a    US   n N;f = f)a~  This function attempts to fit as many characters as possible into the available
space, cutting "like a knife" between characters.  This would do for Chinese.
It returns a list of (text, extraSpace) items where text is a Unicode string,
and extraSpace is the points of unused space available on the line.  This is a
structure which is fairly easy to display, and supports 'backtracking' approaches
after the fact.

Test cases assume each character is ten points wide...

>>> dumbSplit(u'Hello', [10]*5, 60)
[[10, u'Hello']]
>>> dumbSplit(u'Hello', [10]*5, 50)
[[0, u'Hello']]
>>> dumbSplit(u'Hello', [10]*5, 40)
[[0, u'Hell'], [30, u'o']]
uQ  
    #>>> dumbSplit(u'Hello', [10]*5, 4)   # less than one character
    #(u'', u'Hello')
    # this says 'Nihongo wa muzukashii desu ne!' (Japanese is difficult isn't it?) in 12 characters
    >>> jtext = u'日本語は難しいですね！'
    >>> dumbSplit(jtext, [10]*11, 30)   #
    (u'日本語', u'は難しいですね！')
    r      r
   ZsN)
isinstancelisttupler   lenr   r   ranger   sumALL_CANNOT_STARTr   strip
IndexError)r   widthsr   _morer   i	widthUsedlineStartPosmaxWidthnWwcr!   
limitCheckjcjks                    r   r   r   \   s   "E ie--I;yT???E#$$A$	|H	TB
$IG		Q%''IaK!-J1vf} +nq0
qs:b1AB|T)SWf_aCQ3 !!A&#fqk*::J &q	A $QA !A! 2* ((QA~-= Q
 LL*<&:&@&@&BCD)$SZ0 LIo $t 1}h*D,?@AL  )$R=)s   3E+ +E=<E=c                     / n[        U 5      [        U5      :X  d   eSn/ nSn X   nX   nXH-   U:  a  UR                  U5        XH-  nOU[        S   ;   a   M6  )a  Split according to Japanese rules according to CJKV (Lunde).

Essentially look for "nice splits" so that we don't end a line
with an open bracket, or start one with a full stop, or stuff like
that.  There is no attempt to try to split compound words into
constituent kanji.  It currently uses wrap-down: packs as much
on a line as possible, then backtracks if needed

This returns a number of words each of which should just about fit
on a line.  If you give it a whole paragraph at once, it will
do all the splits.

It's possible we might slightly step over the width limit
if we do hanging punctuation marks in future (e.g. dangle a Japanese
full stop in the right margin rather than using a whole character
box.

g        r   )r+   r   CANNOT_END_LINE)	r   r1   
availWidthr   curWidthcurLiner3   r   r8   s	            r   kinsokuShoriSplitrC      ss    ( Et9F###HG	A
WI<*$NN2MH _Q'' r   Nu   ([⺀-￿])c           
          [        U4S j[        R                  S[        X5      5      R	                  S5      5      R                  SS5      R                  U5      $ )Nc           	          U < / SQ[        U 5      U R                  S5      -
  S-
  [        UR                  SS5      S   5      -   U:  =(       d    U SS  S:H  =(       a    S   < U< 3$ )N) 
r   rG   r%   r   r&       )r+   rfindsplit)liner   widths      r   <lambda>cjkwrap.<locals>.<lambda>   sm    T4::d+;!;A!=TZZQ/24"58="> !.23i4'-A/ 4r   z\1\0 rF   rH   r   )reducerxsubstrrK   replacer   )r"   rM   r   s      r   cjkwraprU      sM    E  xT!34::3? gdBx 01r   __main__)	textsplit)utf8)__doc____version__unicodedatar   reportlab.pdfbase.pdfmetricsr   reportlab.rl_configr   reportlab.lib.utilsr   CANNOT_START_LINEjoinr.   r?   ALL_CANNOT_ENDr   r   r#   r   rC   recompileUNICODErQ   rU   __name__doctestreportlab.librW   testmod r   r   <module>rj      s   
    4 % )  88-.  R4	 /*F D\|"P 
::!2::.1 Z'OOI r   