rtopy
Top-level package for rtopy.
1"""Top-level package for rtopy.""" 2 3__author__ = """T. Moudiki""" 4__email__ = "thierry.moudiki@gmail.com" 5 6"""rtopy: Lightweight R-Python bridge.""" 7from .rtopy import callfunc 8from .bridge import RBridge, call_r 9from .exceptions import RExecutionError, RNotFoundError, RTypeError 10 11__version__ = "0.2.0" 12__all__ = [ 13 "RBridge", 14 "call_r", 15 "callfunc", 16 "RExecutionError", 17 "RNotFoundError", 18 "RTypeError", 19 "__version__", 20]
28class RBridge: 29 """Lightweight bridge for calling R functions from Python.""" 30 31 def __init__(self, timeout: int = 300, verbose: bool = False): 32 """ 33 Initialize R bridge. 34 35 Parameters 36 ---------- 37 timeout : int 38 Maximum execution time in seconds (default: 300) 39 verbose : bool 40 Print R warnings and messages (default: False) 41 """ 42 self.timeout = timeout 43 self.verbose = verbose 44 self._check_r() 45 46 def _check_r(self): 47 """Verify R is available.""" 48 try: 49 subprocess.run( 50 ["Rscript", "--version"], 51 capture_output=True, 52 check=True, 53 timeout=5, 54 ) 55 except subprocess.TimeoutExpired: 56 raise RNotFoundError("R check timed out") 57 except (subprocess.CalledProcessError, FileNotFoundError): 58 raise RNotFoundError( 59 "R not found. Please install R and add to PATH.\n" 60 "Download from: https://cran.r-project.org/" 61 ) 62 63 def call( 64 self, r_code: str, r_func: str, return_type: str = "auto", **kwargs 65 ) -> Any: 66 """ 67 Call R function with automatic type conversion. 68 69 Parameters 70 ---------- 71 r_code : str 72 R code defining the function (can include library() calls) 73 r_func : str 74 Function name to call 75 return_type : str 76 Output type: "auto", "int", "float", "str", "bool", 77 "list", "dict", "numpy", "pandas", "raw" 78 **kwargs 79 Arguments passed to R function 80 81 Returns 82 ------- 83 Result converted to requested Python type 84 85 Raises 86 ------ 87 RNotFoundError 88 If R is not installed or not in PATH 89 RExecutionError 90 If R script fails to execute 91 RTypeError 92 If type conversion fails 93 94 Examples 95 -------- 96 >>> rb = RBridge() 97 >>> 98 >>> # Simple calculation 99 >>> code = "add <- function(x, y) x + y" 100 >>> rb.call(code, "add", x=5, y=3) 101 8.0 102 >>> 103 >>> # Statistical summary 104 >>> code = ''' 105 ... summarize <- function(x) { 106 ... list(mean=mean(x), sd=sd(x), n=length(x)) 107 ... } 108 ... ''' 109 >>> rb.call(code, "summarize", return_type="dict", x=[1,2,3,4,5]) 110 {'mean': 3.0, 'sd': 1.58..., 'n': 5} 111 """ 112 if r_func not in r_code: 113 raise ValueError(f"Function '{r_func}' not found in r_code") 114 115 # Convert Python inputs to R-compatible format 116 r_args = self._serialize_args(kwargs) 117 118 # Build and execute R script 119 r_script = self._build_script(r_code, r_func, r_args) 120 output = self._execute_r(r_script) 121 122 # Parse and convert output 123 try: 124 parsed = json.loads(output) 125 except json.JSONDecodeError as e: 126 raise RExecutionError(f"Invalid JSON from R: {output[:200]}") from e 127 128 return self._convert_output(parsed, return_type) 129 130 def _serialize_args(self, kwargs: Dict) -> str: 131 """Convert Python args to R-compatible JSON.""" 132 converted = {} 133 134 for k, v in kwargs.items(): 135 if HAS_NUMPY and isinstance(v, np.ndarray): 136 converted[k] = v.tolist() 137 elif HAS_PANDAS and isinstance(v, pd.DataFrame): 138 converted[k] = v.to_dict("list") 139 elif HAS_PANDAS and isinstance(v, pd.Series): 140 converted[k] = v.tolist() 141 else: 142 converted[k] = v 143 144 json_str = json.dumps(converted) 145 return json_str.replace("\\", "\\\\").replace("'", "\\'") 146 147 def _build_script(self, r_code: str, r_func: str, r_args: str) -> str: 148 """Build R script with error handling.""" 149 return f""" 150suppressPackageStartupMessages({{ 151 {r_code.strip()} 152 153 args <- jsonlite::fromJSON('{r_args}') 154 155 result <- tryCatch( 156 do.call({r_func}, args), 157 error = function(e) stop("R error in {r_func}: ", e$message) 158 ) 159 160 json_out <- jsonlite::toJSON( 161 result, 162 auto_unbox = TRUE, 163 force = TRUE, 164 digits = 15, 165 null = "null", 166 na = "null", 167 dataframe = "columns" 168 ) 169 170 cat(json_out, "\\n") 171}}) 172""" 173 174 def _execute_r(self, script: str) -> str: 175 """Execute R script and return stdout.""" 176 with tempfile.NamedTemporaryFile( 177 mode="w", suffix=".R", delete=False, encoding="utf-8" 178 ) as f: 179 f.write(script) 180 temp_file = f.name 181 182 try: 183 proc = subprocess.run( 184 ["Rscript", "--vanilla", temp_file], 185 capture_output=True, 186 text=True, 187 timeout=self.timeout, 188 ) 189 190 if self.verbose and proc.stderr: 191 print(f"[R messages] {proc.stderr}") 192 193 if proc.returncode != 0: 194 error_msg = proc.stderr or proc.stdout 195 raise RExecutionError(f"R script failed:\n{error_msg}") 196 197 if not proc.stdout.strip(): 198 raise RExecutionError("R produced no output") 199 200 return proc.stdout.strip() 201 202 except subprocess.TimeoutExpired: 203 raise RExecutionError( 204 f"R execution timed out after {self.timeout}s" 205 ) 206 finally: 207 try: 208 os.unlink(temp_file) 209 except Exception: 210 pass 211 212 def _convert_output(self, parsed: Any, return_type: str) -> Any: 213 """Convert parsed JSON to requested Python type.""" 214 if return_type == "raw": 215 return parsed 216 217 if return_type == "auto": 218 return_type = self._infer_type(parsed) 219 220 converters = { 221 "int": self._to_int, 222 "float": self._to_float, 223 "str": self._to_str, 224 "bool": self._to_bool, 225 "list": self._to_list, 226 "dict": self._to_dict, 227 "numpy": self._to_numpy, 228 "pandas": self._to_pandas, 229 } 230 231 if return_type not in converters: 232 raise ValueError( 233 f"Unknown return_type '{return_type}'. " 234 f"Valid options: {', '.join(converters.keys())}, raw, auto" 235 ) 236 237 try: 238 return converters[return_type](parsed) 239 except Exception as e: 240 raise RTypeError( 241 f"Failed to convert to {return_type}: {str(e)}" 242 ) from e 243 244 def _infer_type(self, parsed: Any) -> str: 245 """Automatically infer best return type.""" 246 if isinstance(parsed, dict): 247 # Check if it looks like a dataframe (dict of lists) 248 if all(isinstance(v, list) for v in parsed.values()): 249 return "pandas" if HAS_PANDAS else "dict" 250 return "dict" 251 elif isinstance(parsed, list): 252 # Check if it's a numeric array 253 if all(isinstance(x, (int, float)) for x in parsed): 254 return "numpy" if HAS_NUMPY else "list" 255 return "list" 256 elif isinstance(parsed, bool): 257 return "bool" 258 elif isinstance(parsed, int): 259 return "int" 260 elif isinstance(parsed, float): 261 return "float" 262 elif isinstance(parsed, str): 263 return "str" 264 return "raw" 265 266 def _to_int(self, val: Any) -> int: 267 if isinstance(val, (int, float)): 268 return int(val) 269 if isinstance(val, list) and len(val) == 1: 270 return int(val[0]) 271 raise RTypeError(f"Cannot convert {type(val).__name__} to int") 272 273 def _to_float(self, val: Any) -> float: 274 if isinstance(val, (int, float)): 275 return float(val) 276 if isinstance(val, list) and len(val) == 1: 277 return float(val[0]) 278 raise RTypeError(f"Cannot convert {type(val).__name__} to float") 279 280 def _to_str(self, val: Any) -> str: 281 if isinstance(val, str): 282 return val 283 if isinstance(val, list) and len(val) == 1: 284 return str(val[0]) 285 return str(val) 286 287 def _to_bool(self, val: Any) -> bool: 288 if isinstance(val, bool): 289 return val 290 if isinstance(val, list) and len(val) == 1: 291 return bool(val[0]) 292 return bool(val) 293 294 def _to_list(self, val: Any) -> List: 295 if isinstance(val, list): 296 return val 297 if isinstance(val, dict): 298 return list(val.values()) 299 return [val] 300 301 def _to_dict(self, val: Any) -> Dict: 302 if isinstance(val, dict): 303 return val 304 if isinstance(val, list): 305 return {str(i): v for i, v in enumerate(val)} 306 return {"result": val} 307 308 def _to_numpy(self, val: Any): 309 """Convert to NumPy array.""" 310 if not HAS_NUMPY: 311 raise RTypeError( 312 "NumPy not installed. Install with: pip install numpy" 313 ) 314 315 if isinstance(val, list): 316 # Handle matrix (list of lists) 317 if val and isinstance(val[0], list): 318 return np.array(val) 319 return np.array(val) 320 if isinstance(val, dict): 321 # Try to convert dict of lists to 2D array 322 lists = list(val.values()) 323 if all(isinstance(v, list) for v in lists): 324 return np.array(lists).T 325 return np.array(list(val.values())) 326 return np.array([val]) 327 328 def _to_pandas(self, val: Any): 329 """Convert to pandas DataFrame or Series.""" 330 if not HAS_PANDAS: 331 raise RTypeError( 332 "pandas not installed. Install with: pip install pandas" 333 ) 334 335 if isinstance(val, dict): 336 # Dict of lists -> DataFrame 337 if all(isinstance(v, list) for v in val.values()): 338 return pd.DataFrame(val) 339 # Dict of scalars -> Series 340 return pd.Series(val) 341 if isinstance(val, list): 342 # Check if matrix (list of lists) 343 if val and isinstance(val[0], list): 344 return pd.DataFrame(val) 345 return pd.Series(val) 346 raise RTypeError(f"Cannot convert {type(val).__name__} to pandas")
Lightweight bridge for calling R functions from Python.
349def call_r(r_code: str, r_func: str, **kwargs) -> Any: 350 """ 351 Quick wrapper for one-off R function calls. 352 353 Parameters 354 ---------- 355 r_code : str 356 R code defining the function 357 r_func : str 358 Function name to call 359 **kwargs 360 Arguments to pass to the function 361 362 Returns 363 ------- 364 Result with automatic type inference 365 366 Examples 367 -------- 368 >>> result = call_r("square <- function(x) x^2", "square", x=5) 369 >>> print(result) # 25.0 370 """ 371 rb = RBridge() 372 return rb.call(r_code, r_func, **kwargs)
Quick wrapper for one-off R function calls.
Parameters
r_code : str R code defining the function r_func : str Function name to call **kwargs Arguments to pass to the function
Returns
Result with automatic type inference
Examples
>>> result = call_r("square <- function(x) x^2", "square", x=5)
>>> print(result) # 25.0
34@lru_cache # size is 128 'results' 35def callfunc( 36 r_code="my_func <- function() {{set.seed(1); rnorm(1)}}", 37 r_func="my_func", 38 type_return="float", 39 **kwargs, 40): 41 """ 42 43 `callfunc` calls an R function `r_func` defined in an R code `r_code`. 44 45 # Parameters: 46 47 `r_code` (str): R code to be executed for the function `r_func` to run. 48 Must use double braces around function definitions and a semi-colon (';') after 49 each instruction. 50 51 `r_func` (str): name of the R function (defined in `r_code`) being called. 52 53 `type_return` (str): type of function return. Either "int", "float", "list" and "dict". 54 Remark an R list is equivalent a Python dict. An R vector is equivalent to a Python list. 55 An R matrix will be transformed to a Python list of lists. 56 57 `kwargs`: additional (named!) parameters to be passed to R function specified in `r_code` 58 59 # Example: 60 61 ```python 62 import rtopy as rp 63 64 # an R function that returns the product of an arbitrary number of arguments 65 # notice the double braces around the R function's code 66 # and the a semi-colon (';') after each instruction 67 r_code1 = 'my_func <- function(arg1=NULL, arg2=NULL, arg3=NULL, arg4=NULL, arg5=NULL) 68 {{ 69 args <- c(arg1, arg2, arg3, arg4, arg5); 70 args <- args[!sapply(args, is.null)]; 71 result <- prod(args); 72 return(result) 73 }}' 74 75 print(rp.callfunc(r_code=r_code1, r_func="my_func", type_return="int", 76 arg1=3, arg2=5, arg3=2)) 77 ``` 78 79 See also [https://github.com/Techtonique/rtopy/blob/main/rtopy/demo/thierrymoudiki_20240304_rtopyintro.ipynb](https://github.com/Techtonique/rtopy/blob/main/rtopy/demo/thierrymoudiki_20240304_rtopyintro.ipynb) 80 81 """ 82 83 assert r_func in r_code, f"Function {r_func} not found in your `r_code`" 84 85 r_code_ = r_code + ";" 86 87 # Constructing argument string for the R function call 88 arg_string = "" 89 for key, value in kwargs.items(): 90 if isinstance(value, (float, int, list)): 91 arg_string += f"{key}={value}, " 92 elif isinstance(value, str): 93 arg_string += f"{key}={format_value(value)}, " 94 arg_string = arg_string[:-2] # remove last comma and trailing space 95 r_code_ += f"{r_func}({arg_string})" 96 r_code_ = r_code_.replace(" ", "").replace("\n", "") 97 result = subprocess.run( 98 ["Rscript", "-e", r_code_], capture_output=True, text=True, check=True 99 ).stdout 100 101 if type_return in ("int", "float", "list"): 102 if is_vector(result): 103 type_result = "vector" 104 result = result.split("\n")[-2].strip().replace("[1] ", "") 105 elif is_matrix(result): 106 type_result = "matrix" 107 result = str_to_matrix(result) 108 109 if type_return == "dict": 110 keys = extract_elements_with_pattern(extract_pattern(result)) 111 # Initialize an empty dictionary to store the lists 112 result_dict = {} 113 # Iterate over sections and extract key-value pairs 114 r_list_sections = split_string(result) 115 r_list_values = remove_elements_with_pattern(r_list_sections) 116 for idx, key in enumerate(keys): 117 try: 118 section = r_list_values[idx] 119 if is_vector(section): 120 result_dict[keys[idx]] = str_to_vector(section) 121 elif is_matrix(section): 122 result_dict[keys[idx]] = str_to_matrix(section) 123 else: 124 continue 125 except: 126 continue 127 128 if type_return == "float": 129 return float(result) 130 131 elif type_return == "int": 132 return int(result) 133 134 elif type_return == "list": 135 if type_result == "vector": 136 return [float(elt) for elt in result.split(" ")] 137 elif type_result == "matrix": 138 return result 139 140 elif type_return == "dict": 141 return result_dict
callfunc calls an R function r_func defined in an R code r_code.
Parameters:
r_code (str): R code to be executed for the function r_func to run.
Must use double braces around function definitions and a semi-colon (';') after
each instruction.
r_func (str): name of the R function (defined in r_code) being called.
type_return (str): type of function return. Either "int", "float", "list" and "dict".
Remark an R list is equivalent a Python dict. An R vector is equivalent to a Python list.
An R matrix will be transformed to a Python list of lists.
kwargs: additional (named!) parameters to be passed to R function specified in r_code
Example:
import rtopy as rp
# an R function that returns the product of an arbitrary number of arguments
# notice the double braces around the R function's code
# and the a semi-colon (';') after each instruction
r_code1 = 'my_func <- function(arg1=NULL, arg2=NULL, arg3=NULL, arg4=NULL, arg5=NULL)
{{
args <- c(arg1, arg2, arg3, arg4, arg5);
args <- args[!sapply(args, is.null)];
result <- prod(args);
return(result)
}}'
print(rp.callfunc(r_code=r_code1, r_func="my_func", type_return="int",
arg1=3, arg2=5, arg3=2))
See also https://github.com/Techtonique/rtopy/blob/main/rtopy/demo/thierrymoudiki_20240304_rtopyintro.ipynb
Raised when R script execution fails.
Raised when R is not found in PATH.
Raised when type conversion fails.