x 'cd "C:\Users\....\"'; /*to change the work directory*/
%let PROCHTTP_PROXY=http://proxy.univ-lemans.fr:3128; /* to add th proxy if needed */
To be able to use R in SAS, t is often convenient to insert the -RLANG option in a SASV9.CFG (see SAS documentation ) .
Many R packages come with datasets that allow for conducting examples, while others like CASdatasets or insuranceDATA have the main purpose of making a collection of open datasets accessible. An example with R :
install.packages("insuranceData")
#install.packages("CASdatasets", repos = "http://cas.uqam.ca/pub/", type="source")
library(CASdatasets)
data(braggclaim)
data(ausprivauto0405)
library (insuranceData)
data("dataCar")
Use the same data in SAS :
PROC IML;
submit /R;
library(CASdatasets)
data(ausprivauto0405)
library (insuranceData)
data(dataOhlsson)
endsubmit;
call ImportDataSetFromR("work.AusPrivAuto0405","ausprivauto0405");
quit;
My macro accomplishes four steps to improve the previous section :
1. Imports the data "TN" from an R CRAN package to R.
2. Utilizes the PROC CONTENTS procedure to extract a list of variables and their types.
3. Generates a list of macro variables necessary for exporting to a SAS file.
4. Reads the data using DATALINES code and stores it in a SAS file.
*************************************************************;
* récupération sous SAS d'une base de données mis à disposition;
* dans un package R et exportation dans un fichier .sas ;
* de la forme data; ... datalines ;
*************************************************************;
* auteur : Martial Phélippé-Guinvarc'h;
* date : 09/09/2023;
*************************************************************;
%macro LCR(Pkg,TN);
proc iml;
pkg="&Pkg.";
tn="&TN.";
/* Assign the value of the parameter Table Name TN to a variable tn */
%include "Import submit R.sas";
/* Include the external SAS code from the "Import submit R.sas" file */
/* The following code is from the "Import submit R.sas" file */
*submit tn pkg /R;
*library (&pkg)
data(list=c("&tn"))
endsubmit;
/* THis code open the library and open the tn data */
/* to understand why %include statement is needed, see */
/* https://www.lexjansen.com/wuss/2014/76_Final_Paper_PDF.pdf */
call ImportDataSetFromR("&TN.", "&TN." );
/* Call the ImportDataSetFromR subroutine with the TN parameter twice */
quit;
proc contents data=&TN out=a noprint;
/* Use the CONTENTS procedure to retrieve metadata about the dataset specified by TN */
run ;
proc sort data=a ;
by varnum; /* needed to preserve the order of variable list */
proc sql noprint;
select name into: name_list separated by ' "|" ' from a ;
select name into: name separated by ' ' from a ;
/* Retrieve the column names from the dataset and store them in the macro variable name_list */
select count(name) into:n from a ;
/* Count the number of columns in the dataset and store the count in the macro variable n */
select compress(name || case(type) when 1 then "" else "$" end ||
case(FORMAT) when "DATE" then ":DATE9."
when "DATETIME" then ":DATETIME18." else "" end) into: name_listType separated by " " from a ;
select compress( case(type) when 1 then "" else name || compress("$" || put(length,3.)) end ) into: name_Char separated by " " from a ;
select compress( case(FORMAT) when "DATE" then NAME ||":DATE9."
when "DATETIME" then NAME ||":DATETIME18." else "" end) into: name_DATE separated by " " from a ;
/* Create a macro variable name_listType containing column names with a $ suffix for non-numeric columns */
quit;
/* control if needed */
*%put &name_list;
*%put &n;
*%put "0_&TN..sas";
/* create a copie of the table in a sas code file*/;
Filename F "..\..\ActuariatNonVie_M2\SAS\0_&TN..sas";
/* Define a file reference named F with a specific file path */
DATA _null_;
FORMAT &name_date ;
SET &TN end=eof;
/* Start a DATA step with a SET statement to read data from the dataset specified by TN */
FILE F;
/* Specify the file reference F for output */
if _n_=1 then do;
/* Check if the current iteration is the first iteration */
PUT 'data ' "&TN." ' (LABEL="Source CRAN CASdatasets ' "&TN." '");';
PUT "retain " "&name." ";"; /* needed to preserve the order of variable list */
PUT "LENGTH " "&name_char." ";";
put 'infile datalines dsd dlm="|" truncover ;';
/* Write a data step header with a label */
PUT "INPUT &name_listType.;";
PUT "FORMAT " "&name_date." ";";
/* Write an INPUT statement for reading column values */
PUT "DATALINES;" ;
/* Write a DATALINES statement to indicate the start of data lines */
end;
PUT &name_list ;
/* Write the column names to the output file */
IF eof then put ";;;;;;;;;;;;run;";
RUN;
%mend;
/* End of the LCR macro definition */
%LCR(CASdatasets,auscathist);
Please note that the R code `data(package = "PackageName")$results` provides a list of available datasets in a R package.
Then, the following R code allows us to read the SAS file that extracts all databases available in the R package using the previous macro.
write.csv(paste("%LCR(CASdatasets,",as.data.frame(data(package = "CASdatasets")$results)$Item,")"), file=" LCR.sas", row.names = FALSE)
Please note that the command in windows command prompt `type 0_*.sas -> 00_BasesInsuranceFromR.sas` allows us to concatenate all SAS files produced by the macro into a single SAS file named "00_BasesInsuranceFromR.sas".
******* Author : Martial Phélippé-Guinvarc'h;
******* Date 22/06/2021;
******** Obtenir le cahier des charges de chaque appellation viticole en pdf ;
**** Il s'agit en premier lieu de lister les appellations, disponibles sur data.gouv.fr***;
filename aoc "datagouv-listesiqopublies.csv";
proc HTTP
url="https://static.data.gouv.fr/resources/siqo-publies/20210608-095222/2021-06-08-datagouv-listesiqopublies.csv"
method="get"
out=aoc;
run;
proc import datafile=aoc
dbms=csv out=aoc(where =(ID_TYPE_PRODUIT between 2 and 3)keep=appellation ID_TYPE_PRODUIT) replace;
delimiter=";";
;
run;
*** le filtre sur id du produit permet de ne prendre que les cahier des charges viticoles;
proc sort data=aoc nodup;
by Appellation;
run;
**** Une recherche sur Qwant, google ou Yahoo permet;
**** de lister les sites qui fournissent les cahiers des charges;
**** On peut utiliser la proc html mais il est plus simple de faire une data infile;
**** Google permet de cibler les pdf directement mais l'import ne contient pas la liste des résultats;
**** Qwant permet de charger la liste des résultats sans bug;
**** Via une data _null_, j'écris un code SAS d'importation pour chaque appellation;
filename websrap 'webscrap.sas'; * in a permanent file;
data _null_;
set work.aoc end=eof;
file websrap;
* Begin step;
g=quote(compress("https://www.qwant.com/?l=fr&t=web&q=cahier+charges+agriculture+telecharger+le+pdf+aoc+"|| quote(Appellation,"'")));
put "filename x url" ;
put g ";";
put "data websrap(where=(line contains 'div domain='";
put "and line contains 'https://info.agriculture.gouv.fr'));";
put "infile x dsd dlm='<>' lrecl=32767 encoding='utf-8';";
put "input line : $800. @@;";
put "run;";
if _n_ = 1 then
put "data Links; set websrap; run;";
else put "data Links; set Links websrap; run;";
/*if eof then put "; ";*/
*run;
run;
**** parmi tous les sites, le plus fiable est probablement https://info.agriculture.gouv.fr ;
**** se limiter à un seul site, s'il est exhaustif (il affiche une antériorité de 2014), évite de ;
**** télécharger plusieurs fois le même cdc sous des noms différents ;
**** ce site est bien structuré, le lien entre du pdf est presque le même que le lien de la page;
**** il suffit d'ajouter /telechargement;
%include 'webscrap.sas';
data Links;
set Links;
Link=quote(compress(scan(line,2,'"')||"/telechargement"));
run;
*** on supprime les doublons pour ne pas télécharger le même cdc;
proc sort data=Links nodupkey;
by link;
run;
**** Via une data _null_, j'écris de nouveau un code SAS d'importation mais cette fois du pdf;
filename websrapi 'webscrapi.sas'; * in a permanent file;
data _null_;
retain a 0;
set work.Links end=eof;
a+1;
name= quote(compress("AOC" || a ||".pdf"));
file websrapi;
* Begin step;
put "filename pdfs " name ";";
put "proc HTTP url=" link;
put 'method="get" ';
put "out= pdfs ;";
put "run;";
run;
%include 'webscrapi.sas';
***** end;
*************************************************************;
* récupération sous SAS la base de données mise à disposition par Hubeau;
* auteur : Martial Phélippé-Guinvarc'h;
* date : 15/09/2022;
* https://hubeau.eaufrance.fr/;
/*proxy*/
%let PROCHTTP_PROXY=http://localhost:port; * à adapter;
libname Hub "D:\";
*************************************************************;
* import de la liste des stations en json;
filename Hubeau temp ;
proc http
url="https://hubeau.eaufrance.fr/api/v1/hydrometrie/referentiel/stations?format=json&size=9000"
method="GET" out=hubeau;
run;
libname LShub JSON fileref=hubeau;
data ListSite(drop= ordinal_root );
format d_o d_f date9. a best6.;
set LShub.data;
*** convertion des variables caractères ;
array Lib libelle_:;
do over Lib;;
Lib=kcvt(Lib,'utf8','latin1');
end;
*** convertion des dates d ouverture et fermeture;
d_o=input(substr(date_ouverture_station,1,10),YYMMDD10.);
d_f=input(substr(date_fermeture_station,1,10),YYMMDD10.);
if en_service=1 then d_f=today();
a=1;
*** la limite de téléchargement étant 20 000 lignes,
si l'écart entre l'ouverture et la fermeture >20000 alors,
la ligne est dupliquée en période consécutive de 20000 jours;
if d_f-d_o>20000 then do;
output;d_o=d_o+20001;a=2; end;
if d_f-d_o>20000 then do;
output;d_o=d_o+20001;a=3; end;
output;
run;
data ListSite;
set ListSite;
* reconvertion des dates en texte;
date_ouverture_station=put(d_o,YYMMDD10.);
date_fermeture_station=put(min(d_f,d_o+20000),YYMMDD10.);
run;
proc sql;
select count(code_station) into:Nbstation from ListSite;
quit;
************************************************************;
*Initialisation de la data;
Data Hydrometrie;
format grandeur_hydro_elab $3. code_station $12. date_obs_elab $10. resultat_obs_elab Best12.;
set _null_;
run;
*** prefix de l'api;
%let o = %str(https://hubeau.eaufrance.fr/api/v1/hydrometrie/obs_elab?fields=code_station,grandeur_hydro_elab,date_obs_elab,resultat_obs_elab&format=json&size=20000);
%put &o;
%macro importer_tout;
*la macro charge les données par station et par intervalle
max de 20000 j;
%do i=1 %to &Nbstation ;
%put &i.;
filename Hubeau temp;
*** construction du lien entier;
proc sql noprint;
select compress("&o."||"&code_entite=" ||code_station||"&date_debut_obs_elab="||date_fermeture_station)
into:linkstation
from ListSite where ordinal_data=&i;
select code_station into:st
from ListSite where ordinal_data=&i;
quit;
*** téléchargement;
proc http url="&linkstation." method="GET" out=hubeau;
run;
%put &linkstation.;
libname Eaufr JSON fileref=hubeau;
*incrémentation dans la data Hydrometrie;
proc datasets library=EauFr nolist;
append data=data(drop=ordinal_root ordinal_data
where=(code_station="&st.")) base=work.Hydrometrie;
run;
%end;
%mend;
%importer_tout;
proc freq data=work.hydrometrie;
table code_station;
run;
proc sort data=work.hydrometrie out=Hub.hydrometrie;
by code_station date_obs_elab;
run;
/* Program: Extract ZIP File Contents
Run on SAS on Demand https://welcome.oda.sas.com/
Author: Martial Phélippé-Guinvarc'h
Date: January 23, 2025
Description: This program extracts the contents of a specified ZIP file into a designated directory.
*/
/* Define the path to the home directory and the ZIP file name */
%let path=/home/martial.phelippe/;
%let zipf=LWMC1V2;
filename inzip zip "&path.&zipf..zip";/* Reference the ZIP file using the FILENAME statement */
options mprint symbolgen;/* Enable macro debugging options */
/* Step 1: Retrieve the contents of the ZIP file */
data zip_contents;
length memname $200;
fid = dopen("inzip");
if fid = 0 then stop;
memcount = dnum(fid);
do i = 1 to memcount;
memname = dread(fid, i);
call symputx("file"||left(i),memname);
output;
end;
call symputx("Nbfiles", memcount);
rc = dclose(fid);
run;
/* Step 2: Extract files and create directories as needed */
%macro extractzip;
%do l=1 %to &Nbfiles.;
data _null_;
a=find("&&file&l",".")>0; /* the path contains a . when is a file;*/
call symput("filePath",a);
b=scan("&&file&l",-1,"//");
call symputx("dir",b);
c=compress(scan("&&file&l",-1,"/")||"/");
c=%str(tranwrd("&&file&l",trim(c),""));
call symputx("pathto",c);
run;
%put "&path.&&file&l" "&&file&l" &filePath &dir &pathto;
%if %sysevalf(&filePath=1) %then %do; /* Extract file */
data _null_;
filename destfile "&path.&&file&l";
infile inzip("&&file&l") recfm=n;
file destfile recfm=n;
input;
put _infile_;
run;
filename destfile clear;/* Clear the FILENAME reference */
%end;
%else %do; /* Create directory */
%let newdir = %sysfunc(dcreate(&dir, &path.&pathto));
%end;
%end;
%mend;
%extractzip
;
filename inzip clear;
/* Program: Remove completely folder Contents
Run on SAS on Demand https://welcome.oda.sas.com/
Author: Martial Phélippé-Guinvarc'h
Date: January 23, 2025
Description: This program removes the contents of a specified of a designated directory.
Code adapted from ksharp blog proposition
https://communities.sas.com/t5/SAS-Programming/Delete-a-folder-and-all-files-there/td-p/782139
*/
options mprint symbolgen;/* Enable macro debugging options */
%let folder=/home/martial.phelippe/LWMC1V2;
/*Firstly delete all the files under a fold*/
%macro listfile (profondeur) ;
data have;
rc=filename('xx',"&folder");
did=dopen('xx');
d=dnum(did);
put _all_;
do i=1 to dnum(did);
fname=dread(did,i);
rc = filename('filref',catx("&folder",'/',fname));
rc = fdelete('filref');
output;
end;
rc=dclose(did);
run;
%do u=1 %to &profondeur;
data have2;* (where=(find(fname2,".")=0));
set have( keep =fname where=(find(fname,".")=0)) ;
rc=filename('xx',"&folder"||"/"||fname);
did=dopen('xx');
do i=1 to dnum(did);
fname2=dread(did,i);
output;
end;
rc=dclose(did);
run;
data have (keep=fname);
set have have2;
if fname2 ne "" then fname=cats(fname,"/",fname2);
run;
%end;
proc sort data=have nodup;
by descending fname ;
run;
%mend;
%listfile(4)
%macro dfile(dfilename);
filename myfile "&folder/&dfilename";
data _null_;
rc = fdelete('myfile');
if rc = 0 then
put "File deleted successfully.";
else
put "File could not be deleted. RC=" rc;
run;
%mend;
data _null_;
set have end=final;
call execute(cats('%dfile(',fname,');'));
if final then call execute(cats('%dfile();'));
run;