defl/defl/_string_.py

235 lines
6.9 KiB
Python
Raw Normal View History

2025-03-09 09:17:53 -04:00
import base64
2024-09-11 11:14:03 -04:00
import hashlib, difflib
import random
import re
import secrets
import shlex
import string
import subprocess
from functools import partial
2025-03-09 09:17:53 -04:00
from ._ansii_ import cl
2024-09-11 11:14:03 -04:00
from ._basic_ import listFromArgsStar
from ._obj_ import Obj
from ._typing_ import *
utf8_sqaure = ''
2025-03-09 09:17:53 -04:00
def code64(enc=N, dec=N):
assert enc is not N or dec is not N
if enc is not N:
2024-09-11 11:14:03 -04:00
return base64.b64encode(enc.encode('UTF-8')).decode('UTF-8')
2025-03-09 09:17:53 -04:00
if dec is not N:
2024-09-11 11:14:03 -04:00
return base64.b64decode(dec.encode('UTF-8')).decode('UTF-8')
2025-03-09 09:17:53 -04:00
def cottonCandyStr(*args, first: bool = T, shellQuote: bool = F, join: str = ' ') -> str:
2024-09-11 11:14:03 -04:00
args = listFromArgsStar(*args)
args = [str(x) for x in args]
if shellQuote:
args = [shlex.quote(x) for x in args]
2025-03-09 09:17:53 -04:00
fir, rest = (args[0], args[1:]) if first else (N, args)
2024-09-11 11:14:03 -04:00
first = f'{cl.wht}{cl.underline}{fir}{cl.r}' if fir else ''
2025-03-09 09:17:53 -04:00
other = join.join([f'{cl.mag if i % 2 == 0 else cl.cyn}{str(x).strip()}{cl.r}' for i, x in enumerate(rest)])
2024-09-11 11:14:03 -04:00
return first + (' ' if other else '') + other
2025-03-09 09:17:53 -04:00
def dictToStr(d: dict, k='', v='', join: str = ' ', joinItem: str = '=') -> str | dict[str, str]:
2024-09-11 11:14:03 -04:00
d = {f'{k}{kk}{cl.r}': f'{v}{vv}{cl.r}' for kk, vv in d.items()}
2025-03-09 09:17:53 -04:00
if joinItem is not N:
d = [f'{k}{joinItem}{v}' for k, v in d.items()]
if join is not N:
d = join.join(d)
2024-09-11 11:14:03 -04:00
return d
2025-03-09 09:17:53 -04:00
2025-04-28 14:44:03 -04:00
def colorDict(d: dict, k=cl.yel, v=cl.cyn, join: str = N, joinItem: str = '{cl.grn}=') -> str | dict[str, str]:
2025-03-09 09:17:53 -04:00
return dictToStr(d=d, k=k, v=v, join=join, joinItem=joinItem)
2024-09-11 11:14:03 -04:00
def fileHash(f: str):
with open(f, 'br') as fp:
2025-03-09 09:17:53 -04:00
return hashText(fp.read(), encode=N)
2024-09-11 11:14:03 -04:00
hashFile = fileHash
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def hashText(text, encode='UTF-8'):
2025-03-09 09:17:53 -04:00
if encode is not N:
2024-09-11 11:14:03 -04:00
text = text.encode(encode)
return hashlib.md5(text).hexdigest()
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def printLine(unit=''):
2025-03-09 09:17:53 -04:00
stdout = N
with subprocess.Popen('stty size', shell=T, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as out:
2024-09-11 11:14:03 -04:00
stdout, _ = out.communicate()
if stdout:
col = int(stdout.split()[1].decode('UTF-8')) - 1
else:
col = 80
return unit * col
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
# def pj(dump, **kargs):
# log.info(jdumps(dump, **kargs))
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def stringifyDictOfList(dictOfList: dict[list]):
string = ''
for k, v in dictOfList.items():
2025-03-09 09:17:53 -04:00
string += f'{cl.mag}{k} ({len(v)}){cl.reset} ' + ' '.join(v) + '\n'
2024-09-11 11:14:03 -04:00
return string.strip()
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def stringToBashComment(string):
return '# ' + '\n# '.join(string.split('\n'))
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def randomString(length=30):
2025-03-09 09:17:53 -04:00
return ''.join(random.choices(string.ascii_lowercase, k=length))
2024-09-11 11:14:03 -04:00
2025-03-09 09:17:53 -04:00
def formatByteSize(x, color=F, base=1024):
2024-09-11 11:14:03 -04:00
assert base in [1024, 1000]
scale = 0
while x > base:
x = x / base
scale += 1
2025-03-09 09:17:53 -04:00
unit = [N, 'K', 'M', 'G', 'T', 'P'][scale]
2024-09-11 11:14:03 -04:00
toColor = ['', cl.cyn, cl.yel, cl.mag, cl.grn, cl.wit][scale]
if unit:
2025-03-09 09:17:53 -04:00
string = f'{x:,.2f}{unit}'
2024-09-11 11:14:03 -04:00
else:
string = f'{x:,.0f} '
if color:
string = f'{toColor}{string}{cl.res}'
return string
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def stringToInt(theStr):
return int(hashText(theStr), 16)
def prettyFormat(obj, sep=' ', indent=0):
pfPartial = partial(prettyFormat, indent=indent + 1, sep=sep)
objStr = obj
if isinstance(objStr, Mapping):
objStr = {'len': len(obj.keys()), 'types': set(type(x).__name__ for x in obj.values())}
objStr = ' '.join(f'{k}={v}' for k, v in objStr.items())
elif isinstance(objStr, Iterable):
try:
length = len(obj)
except (AttributeError, TypeError):
2025-03-09 09:17:53 -04:00
length = N
2024-09-11 11:14:03 -04:00
if length:
objStr = {'len': len(obj), 'types': set(type(x).__name__ for x in obj)}
objStr = ' '.join(f'{k}={v}' for k, v in objStr.items())
2025-03-09 09:17:53 -04:00
string = f'{cl.grn}[{type(obj).__name__}]{cl.r} {cl.yel}{objStr}{cl.r}\n'
2024-09-11 11:14:03 -04:00
isObjType = '__slots__' in dir(obj) or '__dict__' in dir(obj)
if isinstance(obj, Mapping) or isObjType:
trav = Obj.getObjAsDict(obj) if isObjType else obj
for key, val in trav.items():
2025-03-09 09:17:53 -04:00
string += f'{sep * (indent + 1)}{cl.cyn}[{key}]{cl.r} '
2024-09-11 11:14:03 -04:00
string += pfPartial(val)
elif isinstance(obj, Iterable) and not isinstance(obj, str):
for i, item in enumerate(obj):
string += f'{sep * (indent + 1)}{cl.cyn}[{i}]{cl.r} '
string += pfPartial(item)
# TODO try __dict__ on obj / see jdumps
return string
2025-03-09 09:17:53 -04:00
def stringPreAndJoin(theList, on='\n ', first=N):
return (str(first) if first is not N else on) + on.join([str(x) for x in theList])
2024-09-11 11:14:03 -04:00
# def printLocals():
# import inspect
# frame = inspect.currentframe().f_back
# log.info(prettyFormat(frame.f_locals))
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def preSpaceCount(x):
if not re.compile(r'^\s*$', flags=re.I).search(x):
res = re.compile(r'(^\s*)', flags=re.I).search(x.strip('\n'))
ln = res.span()[1]
return ln
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def formatMultiLineStr(
string,
split: str = '\n',
join: str = '\n',
2025-03-09 09:17:53 -04:00
filt: Callable = N,
dedent: bool = F,
2024-09-11 11:14:03 -04:00
):
split = string.split(split)
if dedent:
lengths = [preSpaceCount(x) for x in split]
lengths = [x for x in lengths if x]
lengths = min(lengths)
split = [x[lengths:] for x in split]
2025-03-09 09:17:53 -04:00
if filt is T:
2024-09-11 11:14:03 -04:00
filt = lambda x: bool(x)
2025-03-09 09:17:53 -04:00
if filt is not N:
2024-09-11 11:14:03 -04:00
split = [x for x in split if filt(x)]
2025-03-09 09:17:53 -04:00
if join is not N and join is not F:
2024-09-11 11:14:03 -04:00
split = join.join(split)
return split
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
Chars = str | bytes | bytearray
Diffable = Chars | Iterable[Chars]
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def toListOfBytes(a):
if isinstance(a, str):
return [bytes(x, 'utf8') for x in a.split('\n')]
elif isinstance(a, bytes | bytearray):
return a.split(b'\n')
elif not isinstance(a, byte | bytearray) and isinstance(a, Iterable):
if isinstance(a[0], str):
return [bytes(x, 'utf8') for x in a]
return a
2025-03-09 09:17:53 -04:00
2024-09-11 11:14:03 -04:00
def diffStrings(s1: Diffable, s2: Diffable) -> bytearray:
s1 = toListOfBytes(s1)
s2 = toListOfBytes(s2)
res = list(difflib.diff_bytes(difflib.unified_diff, s2, s1))
2025-03-09 09:17:53 -04:00
res = res[3:] # | remove patch header
2024-09-11 11:14:03 -04:00
for i, line in enumerate(res):
if line[0:1] == b'-':
res[i] = str(cl.red).encode() + line[0:1] + str(cl.r).encode() + line[1:]
elif line[0:1] == b'+':
res[i] = str(cl.cyn).encode() + line[0:1] + str(cl.r).encode() + line[1:]
string = bytearray()
for x in res:
string += x + b'\n'
return string
2025-03-09 09:17:53 -04:00
def camelCase(*item: Iterable):
if len(item) == 1 and inst(item, Iterable) and not inst(item, str):
item = item[0]
2024-09-11 11:14:03 -04:00
# TODO optomize
if inst(item, str):
item = re.split(r'([a-zA-Z0-9]+)', item)
item = [x for x in item if re.search(r'^[a-zA-Z0-9]+$', x)]
item = [x for x in item]
item = [x for x in item if x]
item = [x.lower() for x in item]
item = [x[0].upper() + x[1:].lower() for x in item]
item = ''.join(item)
item = item[0].lower() + item[1:]
return item
2025-03-09 09:17:53 -04:00
def findInStr(pattern, string):
for i in re.finditer(pattern=pattern, string=string):
yield (i.start(), i.end())