#------------------ EXAMPLE 1 --------------------------------------------------
# count the number of works in CELLAR per year
# @author : Duy Dinh
# @date: 01/06/2015
# input file containing data
input="C:/Users/dinhduy/workspace/semantic-web-tools/input/statistics/works_per_year.dat"
# read data into a table
data = read.table(input, header=TRUE)
par(mfrow=c(1,1))
maxY = max(data$TOTAL + 100)
minY = 1000
lineColors=c('blue')
plot(data$year, data$TOTAL, type = "o",
xlab="Year", ylab="Total number of works",
main="Total number of works per year in CELLAR",
col=lineColors,
ylim=c(minY, maxY)
)
#------------------ EXAMPLE 2 --------------------------------------------------
# count the number of works in CELLAR per month for a specific year
# @author : Duy Dinh
# @date: 01/06/2015
# input file containing data
input="C:/Users/dinhduy/workspace/semantic-web-tools/input/statistics/last5years"
fileFilter="*.dat"
# full file names
files = list.files(input, full.names=TRUE, pattern=fileFilter)
# short file names
names = list.files(input, pattern=fileFilter)
# number of files
n=length(files)
# distribute plots
par(mfrow=c(2,3)) # div by 2 x 3 plot
lineColors=c('blue')
pchars=c(22) # a rectangle symbol
marginY=1000
months=c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')
# get the month having a particular works
# @x: a particular number of works
# @array: the set of numbers of works during a year (12 months)
getMonth <- function (x, array){
len = length(array)
for (i in 1 : len){
if (x == array[i]){
return (i) # month i found
}
}
return (array[1]) # the first month
}
# for each file, read the data into a table and plot data
for (i in 1 : n){
# read data into a table
data = read.table(files[i], header=TRUE)
maxY = max(data$TOTAL)
minY = min(data$TOTAL)
total = sum(data$TOTAL)
plot(data$month, data$TOTAL, type = "o",
xlab="Month", ylab="Nr. of works",
main=strsplit(names[i], ".", fixed=TRUE)[[1]][1],
col=lineColors,
pch=pchars,
ylim=c(minY-marginY, maxY+marginY)
)
legend("bottomright", paste("#total:", total), bty="n")
# compute the months where we observe the min or max number of works
minX = getMonth(minY, data$TOTAL)
points(minX, minY, col='red', type="o", cex=5)
maxX = getMonth(maxY, data$TOTAL)
points(maxX, maxY, col='green', type="o", cex=5)
}
#------------------ EXAMPLE 3 : SPARQL + curl over HTTP --------------------------------------------------
# This script is used to run a SPARQL query, extract the results in CSV format and the visualize data in a plot
# @author: Duy Dinh
# @date: 05/06/2015
# import libraries
library(RCurl)
# end import libraries
# year of interest
year = 2010
# SPARQL endpoint (CELLAR PROD)
endpoint <- "http://publications.europa.eu/webapi/rdf/sparql"
# ----- plot setting ----------------
marginY=100
pChars=c(22)
pColors=c('blue')
# ----- end plot setting ---------------
# User defined functions
# define the SPARQL function that sends and gets the results returned by a SPARQL endpoint
# @endpoint a SPARQL endpoint
# @query SPARQL query
# @parameters HTTP parameters associated to the query
SPARQL <- function(endpoint, query, parameters){
encodedQuery = URLencode(query, reserved = TRUE)
url=paste(endpoint, "?query=", encodedQuery, "&", params, sep="")
# encode the URL
url=URLencode(url, reserved = TRUE)
content = getURL(url,
httpheader=c('Accept'="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
'User-Agent'="Google Chrome Browser"))
return (content)
}
# --------------------- MAIN PROCEDURE -------------------------------
# SPARQL query
query <- sprintf("
DEFINE input:inference \"cdm_rule_set\"
PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX at: <http://publications.europa.eu/ontology/authority/>
SELECT ?month as ?MONTH COUNT(distinct ?work) as ?TOTAL
WHERE {
?work cdm:date_document ?date_doc .
filter ( year(?date_doc) = %d )
}
GROUP BY (month(?date_doc) AS ?month)
ORDER BY ?month
", year)
# Query parameters: format of the results returned by SPARQL endpoint
params="format=text/csv"
content = SPARQL(endpoint, query, params)
# extract data from CSV results
data = read.table(textConnection(content), header=TRUE, sep=",")
# find the maxi and mini number of works during this year
# get the month having a particular works
# @x: a particular number of works
# @array: the set of numbers of works during a year (12 months)
getMonth <- function (x, array){
len = length(array)
for (i in 1 : len){
if (x == array[i]){
return (i) # month i found
}
}
return (array[1]) # the first month
}
# global min, max number of works
maxY = max(data$TOTAL)
minY = min(data$TOTAL)
# and the total number of works during this year
total = sum(data$TOTAL)
# plot data
plot(data, type = "o", main=sprintf("Number of works per month in CELLAR during %d", year),
ylim=c(minY - marginY, maxY + marginY), pch=pChars, cex=1.5, lwd=2, col=pColors)
minX = getMonth(minY, data$TOTAL)
points(minX, minY, col='red', type="o", cex=5)
maxX = getMonth(maxY, data$TOTAL)
points(maxX, maxY, col='green', type="o", cex=5)
legend("bottomright", paste("#total:", total), bty="n")
#------------------ EXAMPLE 4: while loop --------------------------------------------------
# This script is used to run a SPARQL query, extract the results in CSV format and the visualize data in a plot
# @author: Duy Dinh
# @date: 05/06/2015
# import libraries
library(RCurl)
# end import libraries
# year of interest
year = 2011
# SPARQL endpoint (CELLAR PROD)
endpoint <- "http://publications.europa.eu/webapi/rdf/sparql"
# ----- plot setting ----------------
marginY=100
pChars=c(22)
pColors=c('blue')
# ----- end plot setting ---------------
# User defined functions
# define the SPARQL function that sends and gets the results returned by a SPARQL endpoint
# @endpoint a SPARQL endpoint
# @query SPARQL query
# @parameters HTTP parameters associated to the query
SPARQL <- function(endpoint, query, parameters){
encodedQuery = URLencode(query, reserved = TRUE)
url=paste(endpoint, "?query=", encodedQuery, "&", params, sep="")
# encode the URL
url=URLencode(url, reserved = TRUE)
content = getURL(url,
httpheader=c('Accept'="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
'User-Agent'="Google Chrome Browser"))
return (content)
}
# --------------------- MAIN PROCEDURE -------------------------------
curDate = as.Date(Sys.time())
curYear = as.numeric(format(curDate, "%Y"))
year = curYear - 5
par(mfrow=c(2,3)) # div by 2 x 3 plot
while (year <= curYear) {
# SPARQL query
query <- sprintf("
DEFINE input:inference \"cdm_rule_set\"
PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX at: <http://publications.europa.eu/ontology/authority/>
SELECT ?month as ?MONTH COUNT(distinct ?work) as ?TOTAL
WHERE {
?work cdm:date_document ?date_doc .
filter ( year(?date_doc) = %d )
}
GROUP BY (month(?date_doc) AS ?month)
ORDER BY ?month
", year)
# Query parameters: format of the results returned by SPARQL endpoint
params="format=text/csv"
content = SPARQL(endpoint, query, params)
# extract data from CSV results
data = read.table(textConnection(content), header=TRUE, sep=",")
# find the maxi and mini number of works during this year
# get the month having a particular works
# @x: a particular number of works
# @array: the set of numbers of works during a year (12 months)
getMonth <- function (x, array){
len = length(array)
for (i in 1 : len){
if (x == array[i]){
return (i) # month i found
}
}
return (array[1]) # the first month
}
# global min, max number of works
maxY = max(data$TOTAL)
minY = min(data$TOTAL)
# and the total number of works during this year
total = sum(data$TOTAL)
# plot data
plot(data, type = "o", main=sprintf("CELLAR statistics during %d", year),
ylim=c(minY - marginY, maxY + marginY), pch=pChars, cex=1.5, lwd=2, col=pColors)
minX = getMonth(minY, data$TOTAL)
points(minX, minY, col='red', type="o", cex=5)
maxX = getMonth(maxY, data$TOTAL)
points(maxX, maxY, col='green', type="o", cex=5)
legend("bottomright", paste("#total:", total), bty="n")
year = year + 1
}