Spaces:
No application file
No application file
buildscript { | |
repositories { | |
mavenLocal() | |
mavenCentral() | |
maven { | |
url 'https://plugins.gradle.org/m2/' | |
} | |
} | |
dependencies { | |
classpath 'gradle.plugin.org.kt3k.gradle.plugin:coveralls-gradle-plugin:2.12.0' | |
classpath "gradle.plugin.com.github.jengelman.gradle.plugins:shadow:7.0.0" | |
classpath 'com.adarshr:gradle-test-logger-plugin:2.0.0' | |
} | |
} | |
repositories { | |
mavenLocal() | |
mavenCentral() | |
} | |
apply plugin: 'jacoco' | |
jacoco { | |
toolVersion = '0.8.8' | |
} | |
allprojects { | |
apply plugin: 'java-library' | |
apply plugin: 'base' | |
apply plugin: 'com.github.kt3k.coveralls' | |
apply plugin: 'com.adarshr.test-logger' | |
group = "org.grobid" | |
tasks.withType(JavaCompile) { | |
options.encoding = 'UTF-8' | |
// note: the following is not working | |
options.compilerArgs << '-parameters' | |
} | |
} | |
subprojects { | |
apply plugin: 'java' | |
apply plugin: 'maven-publish' | |
publishing { | |
publications { | |
mavenJava(MavenPublication) { | |
from components.java | |
//artifact jar | |
} | |
} | |
repositories { | |
mavenLocal() | |
} | |
} | |
sourceCompatibility = 1.11 | |
targetCompatibility = 1.11 | |
repositories { | |
mavenCentral() | |
maven { | |
url new File(rootProject.rootDir, "grobid-core/localLibs") | |
} | |
maven { url "https://jitpack.io" } | |
} | |
configurations { | |
all*.exclude group: 'org.slf4j', module: "slf4j-log4j12" | |
all*.exclude group: 'log4j', module: "log4j" | |
implementation.setCanBeResolved(true) | |
} | |
ext { | |
// treating them separately, these jars will be flattened into grobid-core.jar on installing, | |
// to avoid missing dependencies from the projects that include grobid-core (see 'jar' task in grobid-core) | |
localLibs = ['crfpp-1.0.2.jar', | |
'langdetect-1.1-20120112.jar', | |
'wipo-analysers-0.0.2.jar', | |
'imageio-pnm-1.0.jar', | |
'wapiti-1.5.0.jar'] | |
} | |
dependencies { | |
// packaging local libs inside grobid-core.jar | |
implementation fileTree(dir: new File(rootProject.rootDir, 'grobid-core/localLibs'), include: localLibs) | |
testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.9.3' | |
testImplementation(platform('org.junit:junit-bom:5.9.3')) | |
testImplementation('org.junit.jupiter:junit-jupiter') | |
testImplementation 'org.easymock:easymock:5.1.0' | |
testImplementation "org.powermock:powermock-api-easymock:2.0.7" | |
testImplementation "org.powermock:powermock-module-junit4:2.0.7" | |
testImplementation "xmlunit:xmlunit:1.6" | |
testImplementation "org.hamcrest:hamcrest-all:1.3" | |
implementation "com.cybozu.labs:langdetect:1.1-20120112" | |
implementation "com.rockymadden.stringmetric:stringmetric-core_2.11:0.27.4" | |
implementation "commons-pool:commons-pool:1.6" | |
implementation "commons-io:commons-io:2.5" | |
implementation "org.apache.commons:commons-lang3:3.6" | |
implementation "org.apache.commons:commons-collections4:4.1" | |
implementation 'org.apache.commons:commons-text:1.11.0' | |
implementation "commons-dbutils:commons-dbutils:1.7" | |
implementation "com.google.guava:guava:31.0.1-jre" | |
implementation "org.apache.httpcomponents:httpclient:4.5.3" | |
implementation "black.ninia:jep:4.0.2" | |
implementation "com.fasterxml.jackson.core:jackson-core:2.14.3" | |
implementation "com.fasterxml.jackson.core:jackson-databind:2.14.3" | |
implementation "com.fasterxml.jackson.module:jackson-module-afterburner:2.14.3" | |
implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.14.3" | |
} | |
task sourceJar(type: Jar) { | |
description = 'A jar that contains source code' | |
archiveClassifier = 'sources' | |
from project.sourceSets.main.java | |
} | |
artifacts { | |
archives sourceJar | |
archives jar | |
} | |
//compileJava.dependsOn(changeVersionIfNeeded) | |
// uploadArchives { | |
// // if you want to enable uploading to some maven repo, add those properties to ~/.gradle/gradle.properties, e.g.: | |
// /* | |
// mavenRepoUserName=maven_username | |
// mavenRepoPassword=super_secret | |
// mavenRepoReleasesUrl=https://nexus3.example.org/repository/maven-releases/ | |
// mavenRepoSnapshotsUrl=https://nexus3.example.org/repository/maven-snapshots/ | |
// */ | |
// def user = project.hasProperty('mavenRepoUserName') ? project.findProperty('mavenRepoUserName') : '' | |
// def password = project.hasProperty('mavenRepoPassword') ? project.findProperty('mavenRepoPassword') : '' | |
// def rurl = project.hasProperty('mavenRepoReleasesUrl') ? project.findProperty('mavenRepoReleasesUrl') : '' | |
// def surl = project.hasProperty('mavenRepoSnapshotsUrl') ? project.findProperty('mavenRepoSnapshotsUrl') : '' | |
// | |
// repositories.mavenDeployer { | |
// repository(url: rurl) { | |
// authentication(userName: user, password: password) | |
// } | |
// snapshotRepository(url: surl) { | |
// authentication(userName: user, password: password) | |
// } | |
// | |
// } | |
// } | |
test { | |
useJUnitPlatform() | |
testLogging.showStandardStreams = true | |
// enable for having separate test executor for different tests | |
forkEvery = 1 | |
maxHeapSize = "1024m" | |
def libraries = "" | |
if (Os.isFamily(Os.FAMILY_MAC)) { | |
if (Os.OS_ARCH.equals("aarch64")) { | |
libraries = "${file("./grobid-home/lib/mac_arm-64").absolutePath}" | |
} else { | |
libraries = "${file("./grobid-home/lib/mac-64").absolutePath}" | |
} | |
} else if (Os.isFamily(Os.FAMILY_UNIX)) { | |
def jepDir = rootProject.rootDir.getAbsolutePath() + "/grobid-home/lib/lin-64/jep" | |
libraries = jepDir | |
jepDir = rootProject.rootDir.getAbsolutePath() + "/grobid-home/lib/lin-64" | |
libraries += ":"+jepDir | |
} else { | |
throw new RuntimeException("Unsupported platform!") | |
} | |
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { | |
jvmArgs "--add-opens", "java.base/java.util.stream=ALL-UNNAMED", | |
"--add-opens", "java.base/java.io=ALL-UNNAMED", "--add-opens", "java.xml/jdk.xml.internal=ALL-UNNAMED" | |
} | |
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries | |
} | |
} | |
/** SUBPROJECTS **/ | |
project("grobid-core") { | |
apply plugin: 'com.github.johnrengelman.shadow' | |
apply plugin: 'jacoco' | |
configurations { | |
shadedLib | |
} | |
dependencies { | |
implementation(group: 'xml-apis', name: 'xml-apis') { | |
// otherwise xml-apis 2.0.1 will come from XOM and will result in | |
// java.lang.ClassNotFoundException: org.w3c.dom.ElementTraversal | |
//TODO: sort out this problem better | |
version { | |
strictly '1.4.01' | |
} | |
} | |
// Logs | |
implementation 'org.slf4j:slf4j-api:1.7.30' | |
implementation 'ch.qos.logback:logback-classic:1.2.3' | |
implementation "org.apache.pdfbox:pdfbox:2.0.18" | |
api "xerces:xercesImpl:2.12.0" | |
api "net.arnx:jsonic:1.3.10" | |
api "net.sf.saxon:Saxon-HE:9.6.0-9" | |
api "xom:xom:1.3.2" | |
api 'javax.xml.bind:jaxb-api:2.3.0' | |
implementation "joda-time:joda-time:2.9.9" | |
implementation "org.apache.lucene:lucene-analyzers-common:4.5.1" | |
implementation 'black.ninia:jep:4.0.2' | |
implementation 'org.apache.opennlp:opennlp-tools:1.9.1' | |
implementation group: 'org.jruby', name: 'jruby-complete', version: '9.2.13.0' | |
shadedLib "org.apache.lucene:lucene-analyzers-common:4.5.1" | |
} | |
jar { | |
from { | |
project.configurations.runtimeClasspath.collect { | |
it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : [] | |
} | |
} | |
exclude("logback.xml") | |
duplicatesStrategy = DuplicatesStrategy.EXCLUDE | |
} | |
shadowJar { | |
archiveClassifier = 'onejar' | |
mergeServiceFiles() | |
zip64 true | |
manifest { | |
attributes 'Main-Class': 'org.grobid.core.main.batch.GrobidMain' | |
} | |
from sourceSets.main.output | |
from { | |
project.configurations.runtimeClasspath.collect { | |
it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : [] | |
} | |
} | |
configurations = [project.configurations.shadedLib, project.configurations.runtimeClasspath] | |
relocate 'org.apache.lucene', 'org.grobid.shaded.org.apache.lucene' | |
} | |
artifacts { | |
archives jar | |
archives shadowJar | |
} | |
processResources { | |
filesMatching('grobid-version.txt') { | |
filter { | |
it.replace('project.version', project.property('version')) | |
} | |
} | |
} | |
task install { | |
dependsOn publishToMavenLocal | |
dependsOn 'shadowJar' | |
} | |
} | |
project("grobid-home") { | |
task packageGrobidHome(type: Zip) { | |
zip64 true | |
from('.') { | |
include("config/*") | |
include("language-detection/**") | |
include("sentence-segmentation/**") | |
include("lib/**") | |
include("pdfalto/**") | |
include("models/**") | |
include("lexicon/**") | |
include("schemas/**") | |
include("scripts/**") | |
exclude("models/**/*.old") | |
} | |
into("grobid-home") | |
} | |
artifacts { | |
archives packageGrobidHome | |
} | |
} | |
import org.apache.tools.ant.taskdefs.condition.Os | |
project(":grobid-service") { | |
apply plugin: 'application' | |
apply plugin: 'jacoco' | |
apply plugin: 'com.github.johnrengelman.shadow' | |
mainClassName = 'org.grobid.service.main.GrobidServiceApplication' | |
tasks.run { | |
def libraries = "" | |
if (Os.isFamily(Os.FAMILY_MAC)) { | |
if (Os.OS_ARCH.equals("aarch64")) { | |
libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}" | |
} else { | |
libraries = "${file("../grobid-home/lib/mac-64").absolutePath}" | |
} | |
} else if (Os.isFamily(Os.FAMILY_UNIX)) { | |
libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" + | |
"${file("../grobid-home/lib/lin-64").absolutePath}:" | |
} else { | |
throw new RuntimeException("Unsupported platform!") | |
} | |
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { | |
jvmArgs "--add-opens", "java.base/java.lang=ALL-UNNAMED" | |
} | |
workingDir = rootProject.rootDir | |
def javaLibraryPath = "${System.getProperty('java.library.path')}:" + libraries | |
// if (System.env.CONDA_PREFIX) { | |
// def condaEnv = "${System.env.CONDA_PREFIX}/lib" | |
// def pythonDirectory = file(condaEnv).listFiles({ it.toString().contains("/lib/python") } as FileFilter)?.first() | |
// def pythonVersion = (pythonDirectory =~ /python([0-9]\.[0-9]+)/)[0][1] | |
// | |
// javaLibraryPath = "${System.getProperty('java.library.path')}:" + | |
// libraries + ":" + | |
// "${System.env.CONDA_PREFIX}/lib:" + | |
// "${System.env.CONDA_PREFIX}/lib/python${pythonVersion}/site-packages/jep" | |
// } | |
systemProperty "java.library.path", javaLibraryPath | |
} | |
tasks.distZip.enabled = true | |
tasks.distTar.enabled = false | |
//tasks.distZip.zip64 = true | |
tasks.shadowDistZip.enabled = false | |
tasks.shadowDistTar.enabled = false | |
distZip { duplicatesStrategy = DuplicatesStrategy.EXCLUDE } | |
distTar { duplicatesStrategy = DuplicatesStrategy.EXCLUDE } | |
dependencies { | |
implementation project(':grobid-core') | |
implementation project(':grobid-trainer') | |
//Dropwizard | |
implementation 'ru.vyarus:dropwizard-guicey:7.0.0' | |
implementation 'io.dropwizard:dropwizard-bom:4.0.0' | |
implementation 'io.dropwizard:dropwizard-core:4.0.0' | |
implementation 'io.dropwizard:dropwizard-assets:4.0.0' | |
implementation 'io.dropwizard:dropwizard-testing:4.0.0' | |
implementation 'io.dropwizard.modules:dropwizard-testing-junit4:4.0.0' | |
implementation 'io.dropwizard:dropwizard-forms:4.0.0' | |
implementation 'io.dropwizard:dropwizard-client:4.0.0' | |
implementation 'io.dropwizard:dropwizard-auth:4.0.0' | |
implementation 'io.dropwizard.metrics:metrics-core:4.2.22' | |
implementation 'io.dropwizard.metrics:metrics-servlets:4.2.22' | |
implementation "org.apache.pdfbox:pdfbox:2.0.3" | |
implementation "javax.activation:activation:1.1.1" | |
implementation "io.prometheus:simpleclient_dropwizard:0.16.0" | |
implementation "io.prometheus:simpleclient_servlet:0.16.0" | |
} | |
shadowJar { | |
archiveClassifier = 'onejar' | |
mergeServiceFiles() | |
zip64 true | |
manifest { | |
attributes 'Main-Class': 'org.grobid.core.main.batch.GrobidMain' | |
} | |
exclude("logback.xml") | |
duplicatesStrategy = DuplicatesStrategy.EXCLUDE | |
} | |
artifacts { | |
archives shadowJar | |
} | |
distributions { | |
main { | |
contents { | |
//from(new File(rootProject.rootDir, "grobid-service/README.md")) { | |
// into "doc" | |
//} | |
from(new File(rootProject.rootDir, "../grobid-home/config/grobid.yaml")) { | |
into "config" | |
} | |
from(new File(rootProject.rootDir, "grobid-service/build/scripts/*")) { | |
into "bin" | |
} | |
} | |
} | |
} | |
} | |
project(":grobid-trainer") { | |
apply plugin: 'com.github.johnrengelman.shadow' | |
apply plugin: 'jacoco' | |
dependencies { | |
implementation(group: 'xml-apis', name: 'xml-apis') { | |
// otherwise xml-apis 2.0.1 will come from XOM and will result in | |
// java.lang.ClassNotFoundException: org.w3c.dom.ElementTraversal | |
//TODO: sort out this problem better | |
version { | |
strictly '1.4.01' | |
} | |
} | |
implementation project(':grobid-core') | |
implementation "com.rockymadden.stringmetric:stringmetric-core_2.10:0.27.3" | |
implementation "me.tongfei:progressbar:0.9.0" | |
// logs | |
implementation 'org.slf4j:slf4j-api:1.7.30' | |
implementation 'ch.qos.logback:logback-classic:1.2.3' | |
} | |
configurations { | |
} | |
jar { | |
from { | |
project.configurations.runtimeClasspath.collect { | |
it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : [] | |
} | |
} | |
exclude("logback.xml") | |
duplicatesStrategy = DuplicatesStrategy.EXCLUDE | |
} | |
shadowJar { | |
archiveClassifier = 'onejar' | |
mergeServiceFiles() | |
zip64 true | |
manifest { | |
attributes 'Main-Class': 'org.grobid.trainer.TrainerRunner' | |
} | |
from('src/main/resources') { | |
include '*.xml' | |
} | |
duplicatesStrategy = DuplicatesStrategy.EXCLUDE | |
} | |
artifacts { | |
archives shadowJar | |
archives jar | |
} | |
task install { | |
dependsOn publishToMavenLocal | |
dependsOn 'shadowJar' | |
} | |
def trainerTasks = [ | |
"train_name_header" : "org.grobid.trainer.NameHeaderTrainer", | |
"train_name_citation" : "org.grobid.trainer.NameCitationTrainer", | |
"train_affiliation_address" : "org.grobid.trainer.AffiliationAddressTrainer", | |
"train_header" : "org.grobid.trainer.HeaderTrainer", | |
"train_fulltext" : "org.grobid.trainer.FulltextTrainer", | |
"train_shorttext" : "org.grobid.trainer.ShorttextTrainer", | |
"train_figure" : "org.grobid.trainer.FigureTrainer", | |
"train_table" : "org.grobid.trainer.TableTrainer", | |
"train_citation" : "org.grobid.trainer.CitationTrainer", | |
"train_date" : "org.grobid.trainer.DateTrainer", | |
"train_segmentation" : "org.grobid.trainer.SegmentationTrainer", | |
"train_reference_segmentation": "org.grobid.trainer.ReferenceSegmenterTrainer", | |
"train_ebook_model" : "org.grobid.trainer.EbookTrainer", | |
"train_patent_citation" : "org.grobid.trainer.PatentParserTrainer", | |
"train_funding_acknowledgement" : "org.grobid.trainer.FundingAcknowledgementTrainer" | |
] | |
def libraries = "" | |
if (Os.isFamily(Os.FAMILY_MAC)) { | |
if (Os.OS_ARCH.equals("aarch64")) { | |
libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}" | |
} else { | |
libraries = "${file("../grobid-home/lib/mac-64").absolutePath}" | |
} | |
} else if (Os.isFamily(Os.FAMILY_UNIX)) { | |
libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" + | |
"${file("../grobid-home/lib/lin-64").absolutePath}:" | |
} else { | |
throw new RuntimeException("Unsupported platform!") | |
} | |
trainerTasks.each { taskName, mainClassName -> | |
tasks.create(name: taskName, type: JavaExec, group: 'modeltraining') { | |
main = mainClassName | |
classpath = sourceSets.main.runtimeClasspath | |
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) | |
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" | |
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries | |
} | |
} | |
// evaluation tasks | |
ext.getArg = { propName, defaultVal -> | |
return project.hasProperty(propName) ? project.getProperty(propName) : defaultVal; | |
} | |
// run like this: | |
// ./gradlew jatsEval -Pp2t=/path/to/goldenSet | |
// ./gradlew jatsEval -Pp2t=/path/to/goldenSet -Prun=1 -PfileRatio=0.1 | |
// ./gradlew teiEval -Pp2t=/path/to/goldenSet | |
// ./gradlew PrepareDOIMatching -Pp2t=ABS_PATH_TO_PMC/PMC_sample_1943 | |
// ./gradlew EvaluateDOIMatching -Pp2t=ABS_PATH_TO_PMC/PMC_sample_1943 | |
task(jatsEval, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') { | |
main = 'org.grobid.trainer.evaluation.EndToEndEvaluation' | |
classpath = sourceSets.main.runtimeClasspath | |
args 'nlm', getArg('p2t', '.'), getArg('run', '0'), getArg('fileRatio', '1.0') | |
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { | |
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" | |
} else { | |
jvmArgs '-Xmx3072m' | |
} | |
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries | |
} | |
task(teiEval, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') { | |
main = 'org.grobid.trainer.evaluation.EndToEndEvaluation' | |
classpath = sourceSets.main.runtimeClasspath | |
args 'tei', getArg('p2t', '.'), getArg('run', '0'), getArg('fileRatio', '1.0') | |
if(JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { | |
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" | |
} else { | |
jvmArgs '-Xmx3072m' | |
} | |
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries | |
} | |
task(PrepareDOIMatching, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') { | |
main = 'org.grobid.trainer.evaluation.EvaluationDOIMatching' | |
classpath = sourceSets.main.runtimeClasspath | |
args 'data', getArg('p2t', '.') | |
if(JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { | |
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" | |
} else { | |
jvmArgs '-Xmx3072m' | |
} | |
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries | |
} | |
task(EvaluateDOIMatching, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') { | |
main = 'org.grobid.trainer.evaluation.EvaluationDOIMatching' | |
classpath = sourceSets.main.runtimeClasspath | |
args 'eval', getArg('p2t', '.') | |
if(JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { | |
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" | |
} else { | |
jvmArgs '-Xmx3072m' | |
} | |
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries | |
} | |
} | |
/** JACOCO **/ | |
tasks.register("codeCoverageReport", JacocoReport) { | |
// If a subproject applies the 'jacoco' plugin, add the result it to the report | |
subprojects { subproject -> | |
subproject.plugins.withType(JacocoPlugin).configureEach { | |
subproject.tasks.matching({ t -> t.extensions.findByType(JacocoTaskExtension) }).configureEach { testTask -> | |
sourceSets subproject.sourceSets.main | |
executionData(testTask) | |
} | |
// To automatically run `test` every time `./gradlew codeCoverageReport` is called, | |
// you may want to set up a task dependency between them as shown below. | |
// Note that this requires the `test` tasks to be resolved eagerly (see `forEach`) which | |
// may have a negative effect on the configuration time of your build. | |
subproject.tasks.matching({ t -> t.extensions.findByType(JacocoTaskExtension) }).forEach { | |
rootProject.tasks.codeCoverageReport.dependsOn(it) | |
} | |
} | |
} | |
// XML -> coveralls, | |
// HTML -> for manual check | |
reports { | |
xml.enabled true | |
html.enabled true | |
csv.enabled true | |
} | |
} | |
/** COVERALLS **/ | |
coveralls { | |
sourceDirs = files(subprojects.sourceSets.main.allSource.srcDirs).files.absolutePath | |
} | |
tasks.coveralls { | |
dependsOn codeCoverageReport | |
} | |
wrapper { | |
gradleVersion "7.2" | |
} | |
build.dependsOn project.getSubprojects().collect({ it.getTasks().getByName("build") }) | |