IN
DEGREE PROJECT COMPUTER SCIENCE AND ENGINEERING, SECOND CYCLE, 30 CREDITS
STOCKHOLM SWEDEN 2020,
Big Data Workflows: DSL-based Specification and Software
Containers for Scalable Execution
YARED DEJENE DESSALK
KTH ROYAL INSTITUTE OF TECHNOLOGY
SCHOOL OF ELECTRICAL ENGINEERING AND COMPUTER SCIENCE
workflow workflow1 {
communicationMedium: medium MESSAGE_QUEUE steps:
- step step1
implementation:
docker-implementation image: 'image1'
parameters: param1 = '2020-01-01', param2 = '2020-01-31'
- step step2 implementation:
docker-implementation image: 'image2' parameters: param3 = 50
}
workflow prototypeWorkflow {
communicationMedium: medium MESSAGE_QUEUE parameters: MQ_HOST = kubemq
steps:
- step unzip triggers:
external-event implementation:
docker-implementation image: 'yareddej/ebw-prototype- 00-unzip'
environment:
STEP_NAME='00-unzip'
- step tsv2csv triggers:
external-event implementation:
docker-implementation image: 'yareddej/ebw-prototype- 01-tsv2csv'
environment:
STEP_NAME='01-tsv2csv'
- step split triggers:
external-event implementation:
docker-implementation image: 'yareddej/ebw-prototype- 02-split'
environment:
STEP_NAME='02-split'
- step transform triggers:
external-event implementation:
docker-implementation image: 'ebw-prototype-03- transform'
parameters: tranformationJar = '/transformation/transformation.jar'
environment:
STEP_NAME='03-transform'
- step toarango triggers:
external-event implementation:
docker-implementation image: 'yareddej/ebw-prototype- 04-toarango'
parameters: tranformationJson = '/transformation/transformation.json'
environment:
STEP_NAME='04-toarango' }
{
"time":"2020-05-06T22:27:30.134Z", "level":"Error",
"message":"Unable to process file: name.basics.04-aa.csv ... moved to sandbox /sandbox/SANDBOXFILE_1588804050_name.basics.04-aa.csv", "input_file":"name.basics.04-aa.csv",
"file":"/code/fetch_process.sh", "line":"110",
"machine":"6801f8e66f0e", "step":"04-transform"
}
✔ ✔ ✘
✘ ✔ ✘
✔ ✘ ✔
✘ ✔ ✔
✔ ✔ ✔
✘ ✘ ✘
✔ ✘ ✘
✔ ✘ ✔
✔ ✔ ✔
grammar workflow.WfDsl with org.eclipse.xtext.common.Terminals generate wfDsl "http://www.WfDsl.workflow"
import "http://www.eclipse.org/emf/2002/Ecore" as ecore WFModel:
Workflow += Workflow*;
Workflow:
'workflow' name = ID
('extends' base = [Workflow])?
'{'
('communicationMedium:' communicationMedium = CommunicationMedium)
('parameters:' (params += InputParameter (',' params +=
InputParameter)*))?
('steps:' ('-' steps += Step ('-' steps += Step)*)+) '}';
Step:
'step' name = ID
('triggers:' (triggers += Trigger (',' triggers += Trigger)*)+ ) ('implementation:' implementation = StepImplementation)
('parameters:' (params += InputParameter (',' params +=
InputParameter)*))?
('environment:' (environment += EnvironmentVariable (',' environment += EnvironmentVariable)*))?
('previous:' ('none' | (previous += [Step] (',' previous +=
[Step])*)))?;
CommunicationMedium:
'medium' type = CommunicationMediumTypes;
StepImplementation:
DockerImplementation;
DockerImplementation:
'docker-implementation' ('image:' startTime = STRING);
Trigger:
OneTimeTrigger | ScheduleTrigger | ExternalEventTrigger;
OneTimeTrigger:
{OneTimeTrigger}
'one-time'
(startTime = STRING)?;
ScheduleTrigger:
IntervalSchedule | CronSchedule;
ExternalEventTrigger:
{ExternalEventTrigger}
'external-event';
IntervalSchedule:
{IntervalSchedule}
'interval-schedule'
('frequency:' frequency = TimeUnits)?
('interval:' interval = INT)?
('start-time:' startTime = STRING)?;
CronSchedule:
{CronSchedule}
'cron-schedule'
('start-time:' startTime = STRING)?
('cron:' cron = STRING)?;
Parameter:
InputParameter | EnvironmentVariable;
InputParameter:
(name = ID ( '=' value = ParameterValueTypes)?) | value = ParameterValueTypes ;
EnvironmentVariable:
name = ID
'=' value = STRING;
ParameterValueTypes:
{StringType} value = STRING | {IntType} value = INT |
{BoolType} value = BOOL_LITERAL | {Identifier} value = ID;
enum CommunicationMediumTypes:
MESSAGE_QUEUE | DISTRIBUTED_FILE_SYSTEM | WEB_SERVICE;
enum TimeUnits:
SECOND | MINUTE | HOUR | DAY | WEEK | MONTH;
terminal BOOL_LITERAL returns ecore::EBoolean:
'true' | 'false';
TRITA-EECS-EX-2020:831
www.kth.se