diff --git a/CHANGES.rst b/CHANGES.rst index b7339da949..b8fb2e44d4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,15 @@ CHANGES ======= +New builtins +++++++++++++ +ByteArray +FileNames +CreateFile +CreateTemporary + + + 2.0.0 ----- diff --git a/mathics/autoload/formats/CSV/Export.m b/mathics/autoload/formats/CSV/Export.m index 6f9e973b01..6cca1dac94 100644 --- a/mathics/autoload/formats/CSV/Export.m +++ b/mathics/autoload/formats/CSV/Export.m @@ -22,7 +22,7 @@ FunctionChannels -> {"Streams"}, Options -> {"ByteOrderMark"}, DefaultElement -> "Plaintext", - BinaryFormat -> True, + BinaryFormat -> False, Options -> { "CharacterEncoding", "FieldSeparators" diff --git a/mathics/builtin/base.py b/mathics/builtin/base.py index b7c6cfdb03..48f26cbcaf 100644 --- a/mathics/builtin/base.py +++ b/mathics/builtin/base.py @@ -21,6 +21,7 @@ MachineReal, PrecisionReal, String, + ByteArrayAtom, Symbol, ensure_context, strip_context, diff --git a/mathics/builtin/files.py b/mathics/builtin/files.py index fdf1020a2b..2b15009bd6 100644 --- a/mathics/builtin/files.py +++ b/mathics/builtin/files.py @@ -19,6 +19,7 @@ import requests import pathlib +from io import BytesIO, StringIO import os.path as osp from itertools import chain @@ -37,6 +38,7 @@ SymbolFalse, SymbolNull, SymbolTrue, + SymbolInfinity, from_python, Integer, BoxError, @@ -47,10 +49,10 @@ from mathics.core.numbers import dps from mathics.builtin.base import Builtin, Predefined, BinaryOperator, PrefixOperator from mathics.builtin.numeric import Hash -from mathics.builtin.strings import to_python_encoding +from mathics.builtin.strings import to_python_encoding, to_regex from mathics.builtin.base import MessageException from mathics.settings import ROOT_DIR - +import re INITIAL_DIR = os.getcwd() HOME_DIR = osp.expanduser("~") @@ -62,6 +64,34 @@ PATH_VAR = [".", HOME_DIR, osp.join(ROOT_DIR, "data"), osp.join(ROOT_DIR, "packages")] +def create_temporary_file(suffix=None, delete=False): + if suffix=="": + suffix = None + + fp = tempfile.NamedTemporaryFile(delete=delete, suffix=suffix) + result = fp.name + fp.close() + return result + +def urlsave_tmp(url, location=None, **kwargs): + suffix = "" + strip_url = url.split("/") + if len(strip_url) > 3: + strip_url = strip_url[-1] + if strip_url != "": + suffix = strip_url[len(strip_url.split(".")[0]) :] + try: + r = requests.get(url, allow_redirects=True) + if location is None: + location = create_temporary_file(suffix=suffix) + with open(location, "wb") as fp: + fp.write(r.content) + result = fp.name + except Exception: + result = None + return result + + def path_search(filename): # For names of the form "name`", search for name.mx and name.m if filename[-1] == "`": @@ -81,23 +111,7 @@ def path_search(filename): or (lenfn > 8 and filename[:8] == "https://") or (lenfn > 6 and filename[:6] == "ftp://") ): - suffix = "" - strip_filename = filename.split("/") - if len(strip_filename) > 3: - strip_filename = strip_filename[-1] - if strip_filename != "": - suffix = strip_filename[len(strip_filename.split(".")[0]) :] - try: - r = requests.get(filename, allow_redirects=True) - if suffix != "": - fp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) - else: - fp = tempfile.NamedTemporaryFile(delete=False) - fp.write(r.content) - result = fp.name - fp.close() - except Exception: - result = None + result = urlsave_tmp(filename) else: for p in PATH_VAR + [""]: path = osp.join(p, filename) @@ -1947,8 +1961,10 @@ def apply(self, channel, expr, evaluation): Expression("FullForm", result).evaluate(evaluation), ) exprs.append(result) - - stream.write("".join(exprs)) + line = "".join(exprs) + if type(stream) is BytesIO: + line = line.encode('utf8') + stream.write(line) try: stream.flush() except IOError as err: @@ -4942,3 +4958,211 @@ def apply(self, context, evaluation): return SymbolFailed return SymbolNull + + +class URLSave(Builtin): + """ +
+
'URLSave["url"]' +
Save "url" in a temporary file. +
'URLSave["url", $filename$]' +
Save "url" in $filename$. +
+ """ + messages = {"invfile": '`1` is not a valid Filename', + "invhttp": '`1` is not a valid URL' + } + def apply_1(self, url, evaluation, **options): + 'URLSave[url_String, OptionsPattern[URLSave]]' + return self.apply_2(url, None, evaluation, **options) + + def apply_2(self, url, filename, evaluation, **options): + 'URLSave[url_String, filename_, OptionsPattern[URLSave]]' + url = url.value + if filename is None: + result = urlsave_tmp(url, None, **options) + elif filename.get_head_name()=="String": + filename = filename.value + result = urlsave_tmp(url, filename, **options) + else: + evaluation.message("URLSave", "invfile", filename) + return SymbolFailed + if result is None: + return SymbolFailed + return String(result) + + +class CreateFile(Builtin): + """ +
+
'CreateFile["filename"]' +
Creates a file named "filename" temporary file, but do not open it. +
'CreateFile[]' +
Creates a temporary file, but do not open it. +
+ """ + rules = {'CreateFile[]':'CreateTemporary[]',} + options = {'CreateIntermediateDirectories': 'True', + 'OverwriteTarget': 'True', + } + + def apply_1(self, filename, evaluation, **options): + 'CreateFile[filename_String, OptionsPattern[CreateFile]]' + try: + # TODO: Implement options + if not osp.isfile(filename.value): + f = open(filename.value, "w") + res = f.name + f.close() + return String(res) + else: + return filename + except: + return SymbolFailed + +class CreateTemporary(Builtin): + """ +
+
'CreateTemporary[]' +
Creates a temporary file, but do not open it. +
+ """ + def apply_0(self, evaluation): + 'CreateTemporary[]' + try: + res = create_temporary_file() + except: + return SymbolFailed + return String(res) + + +class FileNames(Builtin): + """ +
+
'FileNames[]' +
Returns a list with the filenames in the current working folder. +
'FileNames[$form$]' +
Returns a list with the filenames in the current working folder that matches with $form$. +
'FileNames[{$form_1$, $form_2$, $\ldots$}]' +
Returns a list with the filenames in the current working folder that matches with one of $form_1$, $form_2$, $\ldots$. +
'FileNames[{$form_1$, $form_2$, $\ldots$},{$dir_1$, $dir_2$, $\ldots$}]' +
Looks into the directories $dir_1$, $dir_2$, $\ldots$. +
'FileNames[{$form_1$, $form_2$, $\ldots$},{$dir_1$, $dir_2$, $\ldots$}]' +
Looks into the directories $dir_1$, $dir_2$, $\ldots$. +
'FileNames[{$forms$, $dirs$, $n$]' +
Look for files up to the level $n$. +
+ + >> SetDirectory[$InstallationDirectory <> "/autoload"]; + >> FileNames[]//Length + = 2 + >> FileNames["*.m", "formats"]//Length + = 0 + >> FileNames["*.m", "formats", 3]//Length + = 12 + >> FileNames["*.m", "formats", Infinity]//Length + = 12 + """ + fmtmaps = {Symbol("System`All"): "*" } + options = {"IgnoreCase": "Automatic",} + + messages = { + "nofmtstr" : "`1` is not a format or a list of formats.", + "nodirstr" : "`1` is not a directory name or a list of directory names.", + "badn" : "`1` is not an integer number.", + } + + def apply_0(self, evaluation, **options): + '''FileNames[OptionsPattern[FileNames]]''' + return self.apply_3(String("*"), String(os.getcwd()), None, evaluation, **options) + + def apply_1(self, forms, evaluation, **options): + '''FileNames[forms_, OptionsPattern[FileNames]]''' + return self.apply_3(forms, String(os.getcwd()), None, evaluation, **options) + + def apply_2(self, forms, paths, evaluation, **options): + '''FileNames[forms_, paths_, OptionsPattern[FileNames]]''' + return self.apply_3(forms, paths, None, evaluation, **options) + + def apply_3(self, forms, paths, n, evaluation, **options): + '''FileNames[forms_, paths_, n_, OptionsPattern[FileNames]]''' + filenames = set() + # Building a list of forms + if forms.get_head_name() == "System`List": + str_forms = [] + for p in forms._leaves: + if self.fmtmaps.get(p, None): + str_forms.append(self.fmtmaps[p]) + else: + str_forms.append(p) + else: + str_forms = [self.fmtmaps[forms] + if self.fmtmaps.get(forms, None) + else forms] + # Building a list of directories + if paths.get_head_name() == "System`String": + str_paths = [paths.value] + elif paths.get_head_name() == "System`List": + str_paths = [] + for p in paths._leaves: + if p.get_head_name() == "System`String": + str_paths.append(p.value) + else: + evaluation.message("FileNames", "nodirstr", paths) + return + else: + evaluation.message("FileNames", "nodirstr", paths) + return + + if n is not None: + if n.get_head_name() == "System`Integer": + n = n.get_int_value() + elif n.get_head_name() == "System`DirectedInfinity": + n = None + else: + print(n) + evaluation.message("FileNames", "badn", n) + return + else: + n = 1 + + # list the files + if options.get('System`IgnoreCase', None) == SymbolTrue: + patterns = [re.compile("^" + + to_regex(p, evaluation, + abbreviated_patterns=True), + re.IGNORECASE)+"$" + for p in str_forms] + else: + patterns = [re.compile("^" + + to_regex(p, + evaluation, + abbreviated_patterns=True) + + "$") for p in str_forms] + + for path in str_paths: + if not osp.isdir(path): + continue + if n == 1: + for fn in os.listdir(path): + fullname = osp.join(path, fn) + for pattern in patterns: + if pattern.match(fn): + filenames.add(fullname) + break + else: + pathlen = len(path) + for root, dirs, files in os.walk(path): + # FIXME: This is an ugly and inefficient way + # to avoid looking deeper than the level n, but I do not realize + # how to do this better without a lot of code... + if n is not None and len(root[pathlen:].split(osp.sep))>n: + continue + for fn in files+dirs: + for pattern in patterns: + if pattern.match(fn): + filenames.add(osp.join(root,fn)) + break + + + return Expression("List", *[String(s) for s in filenames]) diff --git a/mathics/builtin/graphics.py b/mathics/builtin/graphics.py index c2ce6ba4cf..f69ae27807 100644 --- a/mathics/builtin/graphics.py +++ b/mathics/builtin/graphics.py @@ -3224,7 +3224,7 @@ def boxes_to_xml(self, leaves=None, **options): ) return ( - '' + '' % ( int(width), int(height), diff --git a/mathics/builtin/importexport.py b/mathics/builtin/importexport.py index bb384486e0..572792ddb8 100644 --- a/mathics/builtin/importexport.py +++ b/mathics/builtin/importexport.py @@ -7,7 +7,8 @@ from mathics.version import __version__ # noqa used in loading to check consistency. from mathics.core.expression import Expression, from_python, strip_context, Symbol, SymbolFailed -from mathics.builtin.base import Builtin, Predefined, String, Integer, get_option +from mathics.builtin.base import Builtin, Predefined, String, ByteArrayAtom, Integer, get_option +from mathics.builtin.options import options_to_rules from .pymimesniffer import magic import mimetypes @@ -1206,10 +1207,9 @@ def apply_elements(self, expr, elems, evaluation, options={}): # Load the exporter exporter_symbol, exporter_options = EXPORTERS[format_spec[0]] function_channels = exporter_options.get("System`FunctionChannels") - stream_options, custom_options = _importer_exporter_options( exporter_options.get("System`Options"), options, "System Options", evaluation) - + is_binary = exporter_options["System`BinaryFormat"].is_true() if function_channels is None: evaluation.message('ExportString', 'emptyfch') evaluation.predetermined_out = current_predetermined_out @@ -1217,29 +1217,39 @@ def apply_elements(self, expr, elems, evaluation, options={}): elif function_channels == Expression('List', String('FileNames')): # Generates a temporary file import tempfile - tmpfile = tempfile.NamedTemporaryFile(dir=tempfile.gettempdir()) + tmpfile = tempfile.NamedTemporaryFile(dir=tempfile.gettempdir(), suffix="." + format_spec[0].lower()) filename = String(tmpfile.name) tmpfile.close() exporter_function = Expression( exporter_symbol, filename, expr, *list(chain(stream_options, custom_options))) - if exporter_function.evaluate(evaluation) != Symbol('Null'): + exportres = exporter_function.evaluate(evaluation) + if exportres != Symbol('Null'): evaluation.predetermined_out = current_predetermined_out return SymbolFailed else: try: - tmpstream = open(filename.value, 'rb') - res = tmpstream.read().decode('utf-8') + if is_binary: + tmpstream = open(filename.value, 'rb') + else: + tmpstream = open(filename.value, 'r') + res = tmpstream.read() tmpstream.close() except Exception as e: print("something went wrong") print(e) evaluation.predetermined_out = current_predetermined_out return SymbolFailed - res = String(str(res)) + if is_binary: + res = Expression("ByteArray", ByteArrayAtom(res)) + else: + res = String(str(res)) elif function_channels == Expression('List', String('Streams')): - from io import StringIO + from io import StringIO, BytesIO from mathics.builtin.files import STREAMS, NSTREAMS - pystream = StringIO() + if is_binary: + pystream = BytesIO() + else: + pystream = StringIO() n = next(NSTREAMS) STREAMS.append(pystream) stream = Expression('OutputStream', String('String'), Integer(n)) @@ -1247,7 +1257,10 @@ def apply_elements(self, expr, elems, evaluation, options={}): exporter_symbol, stream, expr, *list(chain(stream_options, custom_options))) res = exporter_function.evaluate(evaluation) if res == Symbol('Null'): - res = String(str(pystream.getvalue())) + if is_binary: + res = Expression("ByteArray", ByteArrayAtom(pystream.getvalue())) + else: + res = String(str(pystream.getvalue())) else: res = Symbol("$Failed") Expression('Close', stream).evaluate(evaluation) @@ -1393,6 +1406,8 @@ def apply(self, expr, evaluation): 'System`Convert`B64Dump`B64Encode[expr_]' if isinstance(expr,String): stringtocodify = expr.get_string_value() + elif expr.get_head_name() == "ByteArray": + return String(base64.b64encode(expr._leaves[0].value).decode('utf8')) else: stringtocodify = Expression('ToString',expr).evaluate(evaluation).get_string_value() return String(base64.b64encode(bytearray(stringtocodify, 'utf8')).decode('utf8')) diff --git a/mathics/builtin/lists.py b/mathics/builtin/lists.py index 48e6e6bb2e..314e7110d5 100644 --- a/mathics/builtin/lists.py +++ b/mathics/builtin/lists.py @@ -28,6 +28,7 @@ from mathics.core.expression import ( Expression, String, + ByteArrayAtom, Symbol, SymbolFailed, SymbolNull, @@ -177,6 +178,43 @@ def find_matching_indices_with_levelspec(expr, pattern, evaluation, levelspec=1, return found +class ByteArray(Builtin): + """ +
+
'ByteArray[{$b_1$, $b_2$, $\ldots$}]' +
Represents a sequence of Bytes $b_1$, $b_2$, $\ldots$ +
'ByteArray["string"]' +
Constructs a byte array where bytes comes from decode a b64 encoded String +
+ + >> A=ByteArray[{1,25,3}] + = ByteArray["ARkD"] + >> A[[2]] + = 25 + >> B=ByteArray["asy"] + = ByteArray["WVhONQ=="] + """ + + messages = {'aotd': 'Elements in `1` are inconsistent with type Byte', + 'lend': 'The first argument in Bytearray[`1`] should ' + \ + 'be a B64 enconded string or a vector of integers',} + + def apply_str(self, string, evaluation): + 'ByteArray[string_String]' + return Expression("ByteArray", ByteArrayAtom(string.value)) + + def apply_list(self, values, evaluation): + 'ByteArray[values_List]' + if not values.has_form('List', None): + return + try: + ba = bytearray([b.get_int_value() for b in values._leaves]) + except: + evaluation.message("ByteArray", 'aotd' , values) + return + return Expression("ByteArray", ByteArrayAtom(ba)) + + class List(Builtin): """
@@ -1027,7 +1065,38 @@ def apply(self, list, i, evaluation): "Part[list_, i___]" indices = i.get_sequence() + # How to deal with ByteArrays + if list.get_head_name() == "System`ByteArray": + if len(indices) > 1: + print("Part::partd1: Depth of object ByteArray[<3>] " + + "is not sufficient for the given part specification.") + return + idx = indices[0] + if idx.get_head_name() == "System`Integer": + idx = idx.get_int_value() + if idx == 0: + return Symbol("System`ByteArray") + data = list._leaves[0].value + lendata = len(data) + if idx < 0: + idx = data - idx + if idx < 0: + evaluation.message("Part", "partw", i, list) + return + else: + idx = idx - 1 + if idx > lendata: + evaluation.message("Part", "partw", i, list) + return + return Integer(data[idx]) + if idx == Symbol("System`All"): + return list + # TODO: handling ranges and lists... + evaluation.message("Part", "notimplemented") + return + + # Otherwise... result = walk_parts([list], indices, evaluation) if result: return result @@ -5591,7 +5660,6 @@ class Delete(Builtin): messages = { "argr": "Delete called with 1 argument; 2 arguments are expected.", "argt": "Delete called with `1` arguments; 2 arguments are expected.", - "partw": "Part `1` of `2` does not exist.", "psl": "Position specification `1` in `2` is not a machine-sized integer or a list of machine-sized integers.", "pkspec": "The expression `1` cannot be used as a part specification. Use `2` instead.", } @@ -5602,7 +5670,7 @@ def apply_one(self, expr, position, evaluation): try: return delete_one(expr, pos) except PartRangeError: - evaluation.message("Delete", "partw", Expression("List", pos), expr) + evaluation.message("Part", "partw", Expression("List", pos), expr) def apply(self, expr, positions, evaluation): "Delete[expr_, positions___]" @@ -5640,10 +5708,10 @@ def apply(self, expr, positions, evaluation): try: newexpr = delete_rec(newexpr, pos) except PartDepthError as exc: - return evaluation.message("Delete", "partw", Integer(exc.index), expr) + return evaluation.message("Part", "partw", Integer(exc.index), expr) except PartError: return evaluation.message( - "Delete", "partw", Expression("List", *pos), expr + "Part", "partw", Expression("List", *pos), expr ) return newexpr diff --git a/mathics/core/expression.py b/mathics/core/expression.py index f0f3105d51..77b210d065 100644 --- a/mathics/core/expression.py +++ b/mathics/core/expression.py @@ -15,6 +15,7 @@ from mathics.core.numbers import get_type, dps, prec, min_prec, machine_precision from mathics.core.convert import sympy_symbol_prefix, SympyExpression +import base64 def fully_qualified_symbol_name(name) -> bool: @@ -135,6 +136,8 @@ def from_python(arg): return arg elif isinstance(arg, list) or isinstance(arg, tuple): return Expression('List', *[from_python(leaf) for leaf in arg]) + elif isinstance(arg, bytearray) or isinstance(arg, bytes): + return Expression('ByteArray', ByteArrayAtom(arg)) else: raise NotImplementedError @@ -2684,6 +2687,78 @@ def user_hash(self, update): def __getnewargs__(self): return (self.value,) +class ByteArrayAtom(Atom): + value: str + + def __new__(cls, value): + self = super().__new__(cls) + if type(value) in (bytes, bytearray): + self.value = value + elif type(value) is list: + self.value = bytearray(list) + elif type(value) is str: + self.value = base64.b64encode(bytearray(value, 'utf8')) + else: + raise Exception("value does not belongs to a valid type") + return self + + def __str__(self) -> str: + return '"' + base64.b64encode(self.value).decode('utf8') + '"' + + def boxes_to_text(self, **options) -> str: + return '"' + base64.b64encode(self.value).decode('utf8') + '"' + + def boxes_to_xml(self, **options) -> str: + return encode_mathml(String(base64.b64encode(self.value).decode('utf8'))) + + def boxes_to_tex(self, **options) -> str: + from mathics.builtin import builtins + return encode_tex(String(base64.b64encode(self.value).decode('utf8'))) + + def atom_to_boxes(self, f, evaluation): + return String('"' + self.__str__() + '"') + + def do_copy(self) -> 'ByteArray': + return ByteArrayAtom(self.value) + + def default_format(self, evaluation, form) -> str: + value = self.value + return value.__str__() + + def get_sort_key(self, pattern_sort=False): + if pattern_sort: + return super().get_sort_key(True) + else: + return [0, 1, self.value, 0, 1] + + def same(self, other) -> bool: + # FIX: check + if isinstance(other, ByteArrayAtom): + return self.value == other.value + return False + + def get_string_value(self) -> str: + try: + return self.value.decode('utf-8') + except: + return None + + def to_sympy(self, **kwargs): + return None + + def to_python(self, *args, **kwargs) -> str: + return self.value + + def __hash__(self): + return hash(("ByteArrayAtom", self.value)) + + def user_hash(self, update): + # hashing a String is the one case where the user gets the untampered + # hash value of the string's text. this corresponds to MMA behavior. + update(self.value) + + def __getnewargs__(self): + return (self.value,) class StringFromPython(String): def __new__(cls, value):