rtopy

Top-level package for rtopy.

 1"""Top-level package for rtopy."""
 2
 3__author__ = """T. Moudiki"""
 4__email__ = "thierry.moudiki@gmail.com"
 5
 6"""rtopy: Lightweight R-Python bridge."""
 7from .rtopy import callfunc
 8from .bridge import RBridge, call_r
 9from .exceptions import RExecutionError, RNotFoundError, RTypeError
10
11__version__ = "0.2.0"
12__all__ = [
13    "RBridge",
14    "call_r",
15    "callfunc",
16    "RExecutionError",
17    "RNotFoundError",
18    "RTypeError",
19    "__version__",
20]
class RBridge:
 28class RBridge:
 29    """Lightweight bridge for calling R functions from Python."""
 30
 31    def __init__(self, timeout: int = 300, verbose: bool = False):
 32        """
 33        Initialize R bridge.
 34
 35        Parameters
 36        ----------
 37        timeout : int
 38            Maximum execution time in seconds (default: 300)
 39        verbose : bool
 40            Print R warnings and messages (default: False)
 41        """
 42        self.timeout = timeout
 43        self.verbose = verbose
 44        self._check_r()
 45
 46    def _check_r(self):
 47        """Verify R is available."""
 48        try:
 49            subprocess.run(
 50                ["Rscript", "--version"],
 51                capture_output=True,
 52                check=True,
 53                timeout=5,
 54            )
 55        except subprocess.TimeoutExpired:
 56            raise RNotFoundError("R check timed out")
 57        except (subprocess.CalledProcessError, FileNotFoundError):
 58            raise RNotFoundError(
 59                "R not found. Please install R and add to PATH.\n"
 60                "Download from: https://cran.r-project.org/"
 61            )
 62
 63    def call(
 64        self, r_code: str, r_func: str, return_type: str = "auto", **kwargs
 65    ) -> Any:
 66        """
 67        Call R function with automatic type conversion.
 68
 69        Parameters
 70        ----------
 71        r_code : str
 72            R code defining the function (can include library() calls)
 73        r_func : str
 74            Function name to call
 75        return_type : str
 76            Output type: "auto", "int", "float", "str", "bool",
 77            "list", "dict", "numpy", "pandas", "raw"
 78        **kwargs
 79            Arguments passed to R function
 80
 81        Returns
 82        -------
 83        Result converted to requested Python type
 84
 85        Raises
 86        ------
 87        RNotFoundError
 88            If R is not installed or not in PATH
 89        RExecutionError
 90            If R script fails to execute
 91        RTypeError
 92            If type conversion fails
 93
 94        Examples
 95        --------
 96        >>> rb = RBridge()
 97        >>>
 98        >>> # Simple calculation
 99        >>> code = "add <- function(x, y) x + y"
100        >>> rb.call(code, "add", x=5, y=3)
101        8.0
102        >>>
103        >>> # Statistical summary
104        >>> code = '''
105        ... summarize <- function(x) {
106        ...     list(mean=mean(x), sd=sd(x), n=length(x))
107        ... }
108        ... '''
109        >>> rb.call(code, "summarize", return_type="dict", x=[1,2,3,4,5])
110        {'mean': 3.0, 'sd': 1.58..., 'n': 5}
111        """
112        if r_func not in r_code:
113            raise ValueError(f"Function '{r_func}' not found in r_code")
114
115        # Convert Python inputs to R-compatible format
116        r_args = self._serialize_args(kwargs)
117
118        # Build and execute R script
119        r_script = self._build_script(r_code, r_func, r_args)
120        output = self._execute_r(r_script)
121
122        # Parse and convert output
123        try:
124            parsed = json.loads(output)
125        except json.JSONDecodeError as e:
126            raise RExecutionError(f"Invalid JSON from R: {output[:200]}") from e
127
128        return self._convert_output(parsed, return_type)
129
130    def _serialize_args(self, kwargs: Dict) -> str:
131        """Convert Python args to R-compatible JSON."""
132        converted = {}
133
134        for k, v in kwargs.items():
135            if HAS_NUMPY and isinstance(v, np.ndarray):
136                converted[k] = v.tolist()
137            elif HAS_PANDAS and isinstance(v, pd.DataFrame):
138                converted[k] = v.to_dict("list")
139            elif HAS_PANDAS and isinstance(v, pd.Series):
140                converted[k] = v.tolist()
141            else:
142                converted[k] = v
143
144        json_str = json.dumps(converted)
145        return json_str.replace("\\", "\\\\").replace("'", "\\'")
146
147    def _build_script(self, r_code: str, r_func: str, r_args: str) -> str:
148        """Build R script with error handling."""
149        return f"""
150suppressPackageStartupMessages({{
151    {r_code.strip()}
152    
153    args <- jsonlite::fromJSON('{r_args}')
154    
155    result <- tryCatch(
156        do.call({r_func}, args),
157        error = function(e) stop("R error in {r_func}: ", e$message)
158    )
159    
160    json_out <- jsonlite::toJSON(
161        result,
162        auto_unbox = TRUE,
163        force = TRUE,
164        digits = 15,
165        null = "null",
166        na = "null",
167        dataframe = "columns"
168    )
169    
170    cat(json_out, "\\n")
171}})
172"""
173
174    def _execute_r(self, script: str) -> str:
175        """Execute R script and return stdout."""
176        with tempfile.NamedTemporaryFile(
177            mode="w", suffix=".R", delete=False, encoding="utf-8"
178        ) as f:
179            f.write(script)
180            temp_file = f.name
181
182        try:
183            proc = subprocess.run(
184                ["Rscript", "--vanilla", temp_file],
185                capture_output=True,
186                text=True,
187                timeout=self.timeout,
188            )
189
190            if self.verbose and proc.stderr:
191                print(f"[R messages] {proc.stderr}")
192
193            if proc.returncode != 0:
194                error_msg = proc.stderr or proc.stdout
195                raise RExecutionError(f"R script failed:\n{error_msg}")
196
197            if not proc.stdout.strip():
198                raise RExecutionError("R produced no output")
199
200            return proc.stdout.strip()
201
202        except subprocess.TimeoutExpired:
203            raise RExecutionError(
204                f"R execution timed out after {self.timeout}s"
205            )
206        finally:
207            try:
208                os.unlink(temp_file)
209            except Exception:
210                pass
211
212    def _convert_output(self, parsed: Any, return_type: str) -> Any:
213        """Convert parsed JSON to requested Python type."""
214        if return_type == "raw":
215            return parsed
216
217        if return_type == "auto":
218            return_type = self._infer_type(parsed)
219
220        converters = {
221            "int": self._to_int,
222            "float": self._to_float,
223            "str": self._to_str,
224            "bool": self._to_bool,
225            "list": self._to_list,
226            "dict": self._to_dict,
227            "numpy": self._to_numpy,
228            "pandas": self._to_pandas,
229        }
230
231        if return_type not in converters:
232            raise ValueError(
233                f"Unknown return_type '{return_type}'. "
234                f"Valid options: {', '.join(converters.keys())}, raw, auto"
235            )
236
237        try:
238            return converters[return_type](parsed)
239        except Exception as e:
240            raise RTypeError(
241                f"Failed to convert to {return_type}: {str(e)}"
242            ) from e
243
244    def _infer_type(self, parsed: Any) -> str:
245        """Automatically infer best return type."""
246        if isinstance(parsed, dict):
247            # Check if it looks like a dataframe (dict of lists)
248            if all(isinstance(v, list) for v in parsed.values()):
249                return "pandas" if HAS_PANDAS else "dict"
250            return "dict"
251        elif isinstance(parsed, list):
252            # Check if it's a numeric array
253            if all(isinstance(x, (int, float)) for x in parsed):
254                return "numpy" if HAS_NUMPY else "list"
255            return "list"
256        elif isinstance(parsed, bool):
257            return "bool"
258        elif isinstance(parsed, int):
259            return "int"
260        elif isinstance(parsed, float):
261            return "float"
262        elif isinstance(parsed, str):
263            return "str"
264        return "raw"
265
266    def _to_int(self, val: Any) -> int:
267        if isinstance(val, (int, float)):
268            return int(val)
269        if isinstance(val, list) and len(val) == 1:
270            return int(val[0])
271        raise RTypeError(f"Cannot convert {type(val).__name__} to int")
272
273    def _to_float(self, val: Any) -> float:
274        if isinstance(val, (int, float)):
275            return float(val)
276        if isinstance(val, list) and len(val) == 1:
277            return float(val[0])
278        raise RTypeError(f"Cannot convert {type(val).__name__} to float")
279
280    def _to_str(self, val: Any) -> str:
281        if isinstance(val, str):
282            return val
283        if isinstance(val, list) and len(val) == 1:
284            return str(val[0])
285        return str(val)
286
287    def _to_bool(self, val: Any) -> bool:
288        if isinstance(val, bool):
289            return val
290        if isinstance(val, list) and len(val) == 1:
291            return bool(val[0])
292        return bool(val)
293
294    def _to_list(self, val: Any) -> List:
295        if isinstance(val, list):
296            return val
297        if isinstance(val, dict):
298            return list(val.values())
299        return [val]
300
301    def _to_dict(self, val: Any) -> Dict:
302        if isinstance(val, dict):
303            return val
304        if isinstance(val, list):
305            return {str(i): v for i, v in enumerate(val)}
306        return {"result": val}
307
308    def _to_numpy(self, val: Any):
309        """Convert to NumPy array."""
310        if not HAS_NUMPY:
311            raise RTypeError(
312                "NumPy not installed. Install with: pip install numpy"
313            )
314
315        if isinstance(val, list):
316            # Handle matrix (list of lists)
317            if val and isinstance(val[0], list):
318                return np.array(val)
319            return np.array(val)
320        if isinstance(val, dict):
321            # Try to convert dict of lists to 2D array
322            lists = list(val.values())
323            if all(isinstance(v, list) for v in lists):
324                return np.array(lists).T
325            return np.array(list(val.values()))
326        return np.array([val])
327
328    def _to_pandas(self, val: Any):
329        """Convert to pandas DataFrame or Series."""
330        if not HAS_PANDAS:
331            raise RTypeError(
332                "pandas not installed. Install with: pip install pandas"
333            )
334
335        if isinstance(val, dict):
336            # Dict of lists -> DataFrame
337            if all(isinstance(v, list) for v in val.values()):
338                return pd.DataFrame(val)
339            # Dict of scalars -> Series
340            return pd.Series(val)
341        if isinstance(val, list):
342            # Check if matrix (list of lists)
343            if val and isinstance(val[0], list):
344                return pd.DataFrame(val)
345            return pd.Series(val)
346        raise RTypeError(f"Cannot convert {type(val).__name__} to pandas")

Lightweight bridge for calling R functions from Python.

def call_r(r_code: str, r_func: str, **kwargs) -> Any:
349def call_r(r_code: str, r_func: str, **kwargs) -> Any:
350    """
351    Quick wrapper for one-off R function calls.
352
353    Parameters
354    ----------
355    r_code : str
356        R code defining the function
357    r_func : str
358        Function name to call
359    **kwargs
360        Arguments to pass to the function
361
362    Returns
363    -------
364    Result with automatic type inference
365
366    Examples
367    --------
368    >>> result = call_r("square <- function(x) x^2", "square", x=5)
369    >>> print(result)  # 25.0
370    """
371    rb = RBridge()
372    return rb.call(r_code, r_func, **kwargs)

Quick wrapper for one-off R function calls.

Parameters

r_code : str R code defining the function r_func : str Function name to call **kwargs Arguments to pass to the function

Returns

Result with automatic type inference

Examples

>>> result = call_r("square <- function(x) x^2", "square", x=5)
>>> print(result)  # 25.0
@lru_cache
def callfunc( r_code='my_func <- function() {{set.seed(1); rnorm(1)}}', r_func='my_func', type_return='float', **kwargs):
 34@lru_cache  # size is 128 'results'
 35def callfunc(
 36    r_code="my_func <- function() {{set.seed(1); rnorm(1)}}",
 37    r_func="my_func",
 38    type_return="float",
 39    **kwargs,
 40):
 41    """
 42
 43    `callfunc` calls an R function `r_func` defined in an R code `r_code`.
 44
 45    # Parameters:
 46
 47    `r_code` (str): R code to be executed for the function `r_func` to run.
 48    Must use double braces around function definitions and a semi-colon (';') after
 49    each instruction.
 50
 51    `r_func` (str): name of the R function (defined in `r_code`) being called.
 52
 53    `type_return` (str): type of function return. Either "int", "float", "list" and "dict".
 54    Remark an R list is equivalent a Python dict. An R vector is equivalent to a Python list.
 55    An R matrix will be transformed to a Python list of lists.
 56
 57    `kwargs`: additional (named!) parameters to be passed to R function specified in `r_code`
 58
 59    # Example:
 60
 61    ```python
 62    import rtopy as rp
 63
 64    # an R function that returns the product of an arbitrary number of arguments
 65    # notice the double braces around the R function's code
 66    # and the a semi-colon (';') after each instruction
 67    r_code1 = 'my_func <- function(arg1=NULL, arg2=NULL, arg3=NULL, arg4=NULL, arg5=NULL)
 68                {{
 69                    args <- c(arg1, arg2, arg3, arg4, arg5);
 70                    args <- args[!sapply(args, is.null)];
 71                    result <- prod(args);
 72                    return(result)
 73                }}'
 74
 75    print(rp.callfunc(r_code=r_code1, r_func="my_func", type_return="int",
 76    arg1=3, arg2=5, arg3=2))
 77    ```
 78
 79    See also [https://github.com/Techtonique/rtopy/blob/main/rtopy/demo/thierrymoudiki_20240304_rtopyintro.ipynb](https://github.com/Techtonique/rtopy/blob/main/rtopy/demo/thierrymoudiki_20240304_rtopyintro.ipynb)
 80
 81    """
 82
 83    assert r_func in r_code, f"Function {r_func} not found in your `r_code`"
 84
 85    r_code_ = r_code + ";"
 86
 87    # Constructing argument string for the R function call
 88    arg_string = ""
 89    for key, value in kwargs.items():
 90        if isinstance(value, (float, int, list)):
 91            arg_string += f"{key}={value}, "
 92        elif isinstance(value, str):
 93            arg_string += f"{key}={format_value(value)}, "
 94    arg_string = arg_string[:-2]  # remove last comma and trailing space
 95    r_code_ += f"{r_func}({arg_string})"
 96    r_code_ = r_code_.replace(" ", "").replace("\n", "")
 97    result = subprocess.run(
 98        ["Rscript", "-e", r_code_], capture_output=True, text=True, check=True
 99    ).stdout
100
101    if type_return in ("int", "float", "list"):
102        if is_vector(result):
103            type_result = "vector"
104            result = result.split("\n")[-2].strip().replace("[1] ", "")
105        elif is_matrix(result):
106            type_result = "matrix"
107            result = str_to_matrix(result)
108
109    if type_return == "dict":
110        keys = extract_elements_with_pattern(extract_pattern(result))
111        # Initialize an empty dictionary to store the lists
112        result_dict = {}
113        # Iterate over sections and extract key-value pairs
114        r_list_sections = split_string(result)
115        r_list_values = remove_elements_with_pattern(r_list_sections)
116        for idx, key in enumerate(keys):
117            try:
118                section = r_list_values[idx]
119                if is_vector(section):
120                    result_dict[keys[idx]] = str_to_vector(section)
121                elif is_matrix(section):
122                    result_dict[keys[idx]] = str_to_matrix(section)
123                else:
124                    continue
125            except:
126                continue
127
128    if type_return == "float":
129        return float(result)
130
131    elif type_return == "int":
132        return int(result)
133
134    elif type_return == "list":
135        if type_result == "vector":
136            return [float(elt) for elt in result.split(" ")]
137        elif type_result == "matrix":
138            return result
139
140    elif type_return == "dict":
141        return result_dict

callfunc calls an R function r_func defined in an R code r_code.

Parameters:

r_code (str): R code to be executed for the function r_func to run. Must use double braces around function definitions and a semi-colon (';') after each instruction.

r_func (str): name of the R function (defined in r_code) being called.

type_return (str): type of function return. Either "int", "float", "list" and "dict". Remark an R list is equivalent a Python dict. An R vector is equivalent to a Python list. An R matrix will be transformed to a Python list of lists.

kwargs: additional (named!) parameters to be passed to R function specified in r_code

Example:

import rtopy as rp

# an R function that returns the product of an arbitrary number of arguments
# notice the double braces around the R function's code
# and the a semi-colon (';') after each instruction
r_code1 = 'my_func <- function(arg1=NULL, arg2=NULL, arg3=NULL, arg4=NULL, arg5=NULL)
            {{
                args <- c(arg1, arg2, arg3, arg4, arg5);
                args <- args[!sapply(args, is.null)];
                result <- prod(args);
                return(result)
            }}'

print(rp.callfunc(r_code=r_code1, r_func="my_func", type_return="int",
arg1=3, arg2=5, arg3=2))

See also https://github.com/Techtonique/rtopy/blob/main/rtopy/demo/thierrymoudiki_20240304_rtopyintro.ipynb

class RExecutionError(rtopy.exceptions.RtopyError):
17class RExecutionError(RtopyError):
18    """Raised when R script execution fails."""
19
20    pass

Raised when R script execution fails.

class RNotFoundError(rtopy.exceptions.RtopyError):
11class RNotFoundError(RtopyError):
12    """Raised when R is not found in PATH."""
13
14    pass

Raised when R is not found in PATH.

class RTypeError(rtopy.exceptions.RtopyError, builtins.TypeError):
23class RTypeError(RtopyError, TypeError):
24    """Raised when type conversion fails."""
25
26    pass

Raised when type conversion fails.

__version__ = '0.2.0'