Improve chapter recognition (#8050)
This commit is contained in:
parent
b5dca2eb09
commit
4b4a138eee
@ -4,35 +4,31 @@ package eu.kanade.tachiyomi.util.chapter
|
|||||||
* -R> = regex conversion.
|
* -R> = regex conversion.
|
||||||
*/
|
*/
|
||||||
object ChapterRecognition {
|
object ChapterRecognition {
|
||||||
|
|
||||||
|
private const val NUMBER_PATTERN = """([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?"""
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* All cases with Ch.xx
|
* All cases with Ch.xx
|
||||||
* Mokushiroku Alice Vol.1 Ch. 4: Misrepresentation -R> 4
|
* Mokushiroku Alice Vol.1 Ch. 4: Misrepresentation -R> 4
|
||||||
*/
|
*/
|
||||||
private val basic = Regex("""(?<=ch\.) *([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
|
private val basic = Regex("""(?<=ch\.) *$NUMBER_PATTERN""")
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Regex used when only one number occurrence
|
|
||||||
* Example: Bleach 567: Down With Snowwhite -R> 567
|
* Example: Bleach 567: Down With Snowwhite -R> 567
|
||||||
*/
|
*/
|
||||||
private val occurrence = Regex("""([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
|
private val number = Regex(NUMBER_PATTERN)
|
||||||
|
|
||||||
/**
|
|
||||||
* Regex used when manga title removed
|
|
||||||
* Example: Solanin 028 Vol. 2 -> 028 Vol.2 -> 028Vol.2 -R> 028
|
|
||||||
*/
|
|
||||||
private val withoutManga = Regex("""^([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Regex used to remove unwanted tags
|
* Regex used to remove unwanted tags
|
||||||
* Example Prison School 12 v.1 vol004 version1243 volume64 -R> Prison School 12
|
* Example Prison School 12 v.1 vol004 version1243 volume64 -R> Prison School 12
|
||||||
*/
|
*/
|
||||||
private val unwanted = Regex("""(?<![a-z])(v|ver|vol|version|volume|season|s).?[0-9]+""")
|
private val unwanted = Regex("""\b(?:v|ver|vol|version|volume|season|s)[^a-z]?[0-9]+""")
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Regex used to remove unwanted whitespace
|
* Regex used to remove unwanted whitespace
|
||||||
* Example One Piece 12 special -R> One Piece 12special
|
* Example One Piece 12 special -R> One Piece 12special
|
||||||
*/
|
*/
|
||||||
private val unwantedWhiteSpace = Regex("""(\s)(extra|special|omake)""")
|
private val unwantedWhiteSpace = Regex("""\s(?=extra|special|omake)""")
|
||||||
|
|
||||||
fun parseChapterNumber(mangaTitle: String, chapterName: String, chapterNumber: Float? = null): Float {
|
fun parseChapterNumber(mangaTitle: String, chapterName: String, chapterNumber: Float? = null): Float {
|
||||||
// If chapter number is known return.
|
// If chapter number is known return.
|
||||||
@ -43,40 +39,23 @@ object ChapterRecognition {
|
|||||||
// Get chapter title with lower case
|
// Get chapter title with lower case
|
||||||
var name = chapterName.lowercase()
|
var name = chapterName.lowercase()
|
||||||
|
|
||||||
|
// Remove manga title from chapter title.
|
||||||
|
name = name.replace(mangaTitle.lowercase(), "").trim()
|
||||||
|
|
||||||
// Remove comma's or hyphens.
|
// Remove comma's or hyphens.
|
||||||
name = name.replace(',', '.').replace('-', '.')
|
name = name.replace(',', '.').replace('-', '.')
|
||||||
|
|
||||||
// Remove unwanted white spaces.
|
// Remove unwanted white spaces.
|
||||||
unwantedWhiteSpace.findAll(name).let {
|
name = unwantedWhiteSpace.replace(name, "")
|
||||||
it.forEach { occurrence -> name = name.replace(occurrence.value, occurrence.value.trim()) }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove unwanted tags.
|
// Remove unwanted tags.
|
||||||
unwanted.findAll(name).let {
|
name = unwanted.replace(name, "")
|
||||||
it.forEach { occurrence -> name = name.replace(occurrence.value, "") }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check base case ch.xx
|
// Check base case ch.xx
|
||||||
getChapterNumberFromMatch(basic.find(name))?.let { return it }
|
basic.find(name)?.let { return getChapterNumberFromMatch(it) }
|
||||||
|
|
||||||
// Check one number occurrence.
|
|
||||||
val occurrences: MutableList<MatchResult> = arrayListOf()
|
|
||||||
occurrence.findAll(name).let {
|
|
||||||
it.forEach { occurrence -> occurrences.add(occurrence) }
|
|
||||||
}
|
|
||||||
|
|
||||||
if (occurrences.size == 1) {
|
|
||||||
getChapterNumberFromMatch(occurrences[0])?.let { return it }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove manga title from chapter title.
|
|
||||||
val nameWithoutManga = name.replace(mangaTitle.lowercase(), "").trim()
|
|
||||||
|
|
||||||
// Check if first value is number after title remove.
|
|
||||||
getChapterNumberFromMatch(withoutManga.find(nameWithoutManga))?.let { return it }
|
|
||||||
|
|
||||||
// Take the first number encountered.
|
// Take the first number encountered.
|
||||||
getChapterNumberFromMatch(occurrence.find(nameWithoutManga))?.let { return it }
|
number.find(name)?.let { return getChapterNumberFromMatch(it) }
|
||||||
|
|
||||||
return chapterNumber ?: -1f
|
return chapterNumber ?: -1f
|
||||||
}
|
}
|
||||||
@ -86,8 +65,8 @@ object ChapterRecognition {
|
|||||||
* @param match result of regex
|
* @param match result of regex
|
||||||
* @return chapter number if found else null
|
* @return chapter number if found else null
|
||||||
*/
|
*/
|
||||||
private fun getChapterNumberFromMatch(match: MatchResult?): Float? {
|
private fun getChapterNumberFromMatch(match: MatchResult): Float {
|
||||||
return match?.let {
|
return match.let {
|
||||||
val initial = it.groups[1]?.value?.toFloat()!!
|
val initial = it.groups[1]?.value?.toFloat()!!
|
||||||
val subChapterDecimal = it.groups[2]?.value
|
val subChapterDecimal = it.groups[2]?.value
|
||||||
val subChapterAlpha = it.groups[3]?.value
|
val subChapterAlpha = it.groups[3]?.value
|
||||||
@ -120,11 +99,9 @@ object ChapterRecognition {
|
|||||||
return .97f
|
return .97f
|
||||||
}
|
}
|
||||||
|
|
||||||
return if (alpha[0] == '.') {
|
val trimmedAlpha = alpha.trimStart('.')
|
||||||
// Take value after (.)
|
if (trimmedAlpha.length == 1) {
|
||||||
parseAlphaPostFix(alpha[1])
|
return parseAlphaPostFix(trimmedAlpha[0])
|
||||||
} else {
|
|
||||||
parseAlphaPostFix(alpha[0])
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,6 +112,8 @@ object ChapterRecognition {
|
|||||||
* x.a -> x.1, x.b -> x.2, etc
|
* x.a -> x.1, x.b -> x.2, etc
|
||||||
*/
|
*/
|
||||||
private fun parseAlphaPostFix(alpha: Char): Float {
|
private fun parseAlphaPostFix(alpha: Char): Float {
|
||||||
return ("0." + (alpha.code - 96).toString()).toFloat()
|
val number = alpha.code - ('a'.code - 1)
|
||||||
|
if (number >= 10) return 0f
|
||||||
|
return number / 10f
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -198,7 +198,6 @@ class ChapterRecognitionTest {
|
|||||||
|
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404.extravol002", 404.99f)
|
assertChapter(mangaTitle, "Fairy Tail 404.extravol002", 404.99f)
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404 extravol002", 404.99f)
|
assertChapter(mangaTitle, "Fairy Tail 404 extravol002", 404.99f)
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404.evol002", 404.5f)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -207,7 +206,6 @@ class ChapterRecognitionTest {
|
|||||||
|
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404.omakevol002", 404.98f)
|
assertChapter(mangaTitle, "Fairy Tail 404.omakevol002", 404.98f)
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404 omakevol002", 404.98f)
|
assertChapter(mangaTitle, "Fairy Tail 404 omakevol002", 404.98f)
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404.ovol002", 404.15f)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -216,7 +214,6 @@ class ChapterRecognitionTest {
|
|||||||
|
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404.specialvol002", 404.97f)
|
assertChapter(mangaTitle, "Fairy Tail 404.specialvol002", 404.97f)
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404 specialvol002", 404.97f)
|
assertChapter(mangaTitle, "Fairy Tail 404 specialvol002", 404.97f)
|
||||||
assertChapter(mangaTitle, "Fairy Tail 404.svol002", 404.19f)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -253,6 +250,16 @@ class ChapterRecognitionTest {
|
|||||||
assertChapter("One Outs", "One Outs 001", 1f)
|
assertChapter("One Outs", "One Outs 001", 1f)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `Chapters containing ordinals`() {
|
||||||
|
val mangaTitle = "The Sister of the Woods with a Thousand Young"
|
||||||
|
|
||||||
|
assertChapter(mangaTitle, "The 1st Night", 1f)
|
||||||
|
assertChapter(mangaTitle, "The 2nd Night", 2f)
|
||||||
|
assertChapter(mangaTitle, "The 3rd Night", 3f)
|
||||||
|
assertChapter(mangaTitle, "The 4th Night", 4f)
|
||||||
|
}
|
||||||
|
|
||||||
private fun assertChapter(mangaTitle: String, name: String, expected: Float) {
|
private fun assertChapter(mangaTitle: String, name: String, expected: Float) {
|
||||||
val chapterNumber = ChapterRecognition.parseChapterNumber(mangaTitle, name)
|
val chapterNumber = ChapterRecognition.parseChapterNumber(mangaTitle, name)
|
||||||
assertEquals(chapterNumber, expected)
|
assertEquals(chapterNumber, expected)
|
||||||
|
Loading…
Reference in New Issue
Block a user