defl/defl/_string_.py
2025-04-28 14:44:03 -04:00

235 lines
6.9 KiB
Python

import base64
import hashlib, difflib
import random
import re
import secrets
import shlex
import string
import subprocess
from functools import partial
from ._ansii_ import cl
from ._basic_ import listFromArgsStar
from ._obj_ import Obj
from ._typing_ import *
utf8_sqaure = ''
def code64(enc=N, dec=N):
assert enc is not N or dec is not N
if enc is not N:
return base64.b64encode(enc.encode('UTF-8')).decode('UTF-8')
if dec is not N:
return base64.b64decode(dec.encode('UTF-8')).decode('UTF-8')
def cottonCandyStr(*args, first: bool = T, shellQuote: bool = F, join: str = ' ') -> str:
args = listFromArgsStar(*args)
args = [str(x) for x in args]
if shellQuote:
args = [shlex.quote(x) for x in args]
fir, rest = (args[0], args[1:]) if first else (N, args)
first = f'{cl.wht}{cl.underline}{fir}{cl.r}' if fir else ''
other = join.join([f'{cl.mag if i % 2 == 0 else cl.cyn}{str(x).strip()}{cl.r}' for i, x in enumerate(rest)])
return first + (' ' if other else '') + other
def dictToStr(d: dict, k='', v='', join: str = ' ', joinItem: str = '=') -> str | dict[str, str]:
d = {f'{k}{kk}{cl.r}': f'{v}{vv}{cl.r}' for kk, vv in d.items()}
if joinItem is not N:
d = [f'{k}{joinItem}{v}' for k, v in d.items()]
if join is not N:
d = join.join(d)
return d
def colorDict(d: dict, k=cl.yel, v=cl.cyn, join: str = N, joinItem: str = '{cl.grn}=') -> str | dict[str, str]:
return dictToStr(d=d, k=k, v=v, join=join, joinItem=joinItem)
def fileHash(f: str):
with open(f, 'br') as fp:
return hashText(fp.read(), encode=N)
hashFile = fileHash
def hashText(text, encode='UTF-8'):
if encode is not N:
text = text.encode(encode)
return hashlib.md5(text).hexdigest()
def printLine(unit=''):
stdout = N
with subprocess.Popen('stty size', shell=T, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as out:
stdout, _ = out.communicate()
if stdout:
col = int(stdout.split()[1].decode('UTF-8')) - 1
else:
col = 80
return unit * col
# def pj(dump, **kargs):
# log.info(jdumps(dump, **kargs))
def stringifyDictOfList(dictOfList: dict[list]):
string = ''
for k, v in dictOfList.items():
string += f'{cl.mag}{k} ({len(v)}){cl.reset} ' + ' '.join(v) + '\n'
return string.strip()
def stringToBashComment(string):
return '# ' + '\n# '.join(string.split('\n'))
def randomString(length=30):
return ''.join(random.choices(string.ascii_lowercase, k=length))
def formatByteSize(x, color=F, base=1024):
assert base in [1024, 1000]
scale = 0
while x > base:
x = x / base
scale += 1
unit = [N, 'K', 'M', 'G', 'T', 'P'][scale]
toColor = ['', cl.cyn, cl.yel, cl.mag, cl.grn, cl.wit][scale]
if unit:
string = f'{x:,.2f}{unit}'
else:
string = f'{x:,.0f} '
if color:
string = f'{toColor}{string}{cl.res}'
return string
def stringToInt(theStr):
return int(hashText(theStr), 16)
def prettyFormat(obj, sep=' ', indent=0):
pfPartial = partial(prettyFormat, indent=indent + 1, sep=sep)
objStr = obj
if isinstance(objStr, Mapping):
objStr = {'len': len(obj.keys()), 'types': set(type(x).__name__ for x in obj.values())}
objStr = ' '.join(f'{k}={v}' for k, v in objStr.items())
elif isinstance(objStr, Iterable):
try:
length = len(obj)
except (AttributeError, TypeError):
length = N
if length:
objStr = {'len': len(obj), 'types': set(type(x).__name__ for x in obj)}
objStr = ' '.join(f'{k}={v}' for k, v in objStr.items())
string = f'{cl.grn}[{type(obj).__name__}]{cl.r} {cl.yel}{objStr}{cl.r}\n'
isObjType = '__slots__' in dir(obj) or '__dict__' in dir(obj)
if isinstance(obj, Mapping) or isObjType:
trav = Obj.getObjAsDict(obj) if isObjType else obj
for key, val in trav.items():
string += f'{sep * (indent + 1)}{cl.cyn}[{key}]{cl.r} '
string += pfPartial(val)
elif isinstance(obj, Iterable) and not isinstance(obj, str):
for i, item in enumerate(obj):
string += f'{sep * (indent + 1)}{cl.cyn}[{i}]{cl.r} '
string += pfPartial(item)
# TODO try __dict__ on obj / see jdumps
return string
def stringPreAndJoin(theList, on='\n ', first=N):
return (str(first) if first is not N else on) + on.join([str(x) for x in theList])
# def printLocals():
# import inspect
# frame = inspect.currentframe().f_back
# log.info(prettyFormat(frame.f_locals))
def preSpaceCount(x):
if not re.compile(r'^\s*$', flags=re.I).search(x):
res = re.compile(r'(^\s*)', flags=re.I).search(x.strip('\n'))
ln = res.span()[1]
return ln
def formatMultiLineStr(
string,
split: str = '\n',
join: str = '\n',
filt: Callable = N,
dedent: bool = F,
):
split = string.split(split)
if dedent:
lengths = [preSpaceCount(x) for x in split]
lengths = [x for x in lengths if x]
lengths = min(lengths)
split = [x[lengths:] for x in split]
if filt is T:
filt = lambda x: bool(x)
if filt is not N:
split = [x for x in split if filt(x)]
if join is not N and join is not F:
split = join.join(split)
return split
Chars = str | bytes | bytearray
Diffable = Chars | Iterable[Chars]
def toListOfBytes(a):
if isinstance(a, str):
return [bytes(x, 'utf8') for x in a.split('\n')]
elif isinstance(a, bytes | bytearray):
return a.split(b'\n')
elif not isinstance(a, byte | bytearray) and isinstance(a, Iterable):
if isinstance(a[0], str):
return [bytes(x, 'utf8') for x in a]
return a
def diffStrings(s1: Diffable, s2: Diffable) -> bytearray:
s1 = toListOfBytes(s1)
s2 = toListOfBytes(s2)
res = list(difflib.diff_bytes(difflib.unified_diff, s2, s1))
res = res[3:] # | remove patch header
for i, line in enumerate(res):
if line[0:1] == b'-':
res[i] = str(cl.red).encode() + line[0:1] + str(cl.r).encode() + line[1:]
elif line[0:1] == b'+':
res[i] = str(cl.cyn).encode() + line[0:1] + str(cl.r).encode() + line[1:]
string = bytearray()
for x in res:
string += x + b'\n'
return string
def camelCase(*item: Iterable):
if len(item) == 1 and inst(item, Iterable) and not inst(item, str):
item = item[0]
# TODO optomize
if inst(item, str):
item = re.split(r'([a-zA-Z0-9]+)', item)
item = [x for x in item if re.search(r'^[a-zA-Z0-9]+$', x)]
item = [x for x in item]
item = [x for x in item if x]
item = [x.lower() for x in item]
item = [x[0].upper() + x[1:].lower() for x in item]
item = ''.join(item)
item = item[0].lower() + item[1:]
return item
def findInStr(pattern, string):
for i in re.finditer(pattern=pattern, string=string):
yield (i.start(), i.end())