hydrus/duplicates.html

2593 lines
77 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="description" content="A personal booru-style media tagger that can import files and tags from your hard drive and popular websites. Content can be shared with other users via user-run servers.">
<link rel="canonical" href="https://hydrusnetwork.github.io/hydrus/duplicates.html">
<link rel="prev" href="getting_started_subscriptions.html">
<link rel="next" href="advanced_siblings.html">
<link rel="icon" href="assets/favicon.svg">
<meta name="generator" content="mkdocs-1.6.0, mkdocs-material-9.5.23">
<title>Filtering Duplicates - hydrus network</title>
<link rel="stylesheet" href="assets/stylesheets/main.6543a935.min.css">
<link rel="stylesheet" href="assets/stylesheets/palette.06af60db.min.css">
<link rel="stylesheet" href="assets/stylesheets/extra.css">
<script>__md_scope=new URL(".",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="blue-grey" data-md-color-accent="blue">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#intro" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="index.html" title="hydrus network" class="md-header__button md-logo" aria-label="hydrus network" data-md-component="logo">
<img src="assets/hydrus-white.svg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
hydrus network
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Filtering Duplicates
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="blue-grey" data-md-color-accent="blue" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 6H7c-3.31 0-6 2.69-6 6s2.69 6 6 6h10c3.31 0 6-2.69 6-6s-2.69-6-6-6zm0 10H7c-2.21 0-4-1.79-4-4s1.79-4 4-4h10c2.21 0 4 1.79 4 4s-1.79 4-4 4zM7 9c-1.66 0-3 1.34-3 3s1.34 3 3 3 3-1.34 3-3-1.34-3-3-3z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="blue" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 7H7a5 5 0 0 0-5 5 5 5 0 0 0 5 5h10a5 5 0 0 0 5-5 5 5 0 0 0-5-5m0 8a3 3 0 0 1-3-3 3 3 0 0 1 3-3 3 3 0 0 1 3 3 3 3 0 0 1-3 3Z"/></svg>
</label>
</form>
<script>var media,input,key,value,palette=__md_get("__palette");if(palette&&palette.color){"(prefers-color-scheme)"===palette.color.media&&(media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']"),palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent"));for([key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/hydrusnetwork/hydrus" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
hydrusnetwork/hydrus
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="index.html" class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="introduction.html" class="md-tabs__link">
Getting Started
</a>
</li>
<li class="md-tabs__item">
<a href="advanced_siblings.html" class="md-tabs__link">
Advanced
</a>
</li>
<li class="md-tabs__item">
<a href="client_api.html" class="md-tabs__link">
API
</a>
</li>
<li class="md-tabs__item">
<a href="faq.html" class="md-tabs__link">
Misc
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="index.html" title="hydrus network" class="md-nav__button md-logo" aria-label="hydrus network" data-md-component="logo">
<img src="assets/hydrus-white.svg" alt="logo">
</a>
hydrus network
</label>
<div class="md-nav__source">
<a href="https://github.com/hydrusnetwork/hydrus" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
hydrusnetwork/hydrus
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="index.html" class="md-nav__link">
<span class="md-ellipsis">
Home
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-ellipsis">
Getting Started
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Getting Started
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="introduction.html" class="md-nav__link">
<span class="md-ellipsis">
Introduction and Statement of Principles
</span>
</a>
</li>
<li class="md-nav__item">
<a href="gettingStartedOverview.html" class="md-nav__link">
<span class="md-ellipsis">
Overview For Getting Started
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_installing.html" class="md-nav__link">
<span class="md-ellipsis">
Installing and Updating
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_files.html" class="md-nav__link">
<span class="md-ellipsis">
Files
</span>
</a>
</li>
<li class="md-nav__item">
<a href="filetypes.html" class="md-nav__link">
<span class="md-ellipsis">
Supported Filetypes
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_importing.html" class="md-nav__link">
<span class="md-ellipsis">
Importing and Exporting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_tags.html" class="md-nav__link">
<span class="md-ellipsis">
Tags
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_searching.html" class="md-nav__link">
<span class="md-ellipsis">
Searching and Sorting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_more_tags.html" class="md-nav__link">
<span class="md-ellipsis">
More Tags
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_downloading.html" class="md-nav__link">
<span class="md-ellipsis">
Downloading
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_ratings.html" class="md-nav__link">
<span class="md-ellipsis">
Ratings
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_12" >
<label class="md-nav__link" for="__nav_2_12" id="__nav_2_12_label" tabindex="">
<span class="md-ellipsis">
PTR
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_12_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2_12">
<span class="md-nav__icon md-icon"></span>
PTR
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="access_keys.html" class="md-nav__link">
<span class="md-ellipsis">
PTR Access Keys
</span>
</a>
</li>
<li class="md-nav__item">
<a href="PTR.html" class="md-nav__link">
<span class="md-ellipsis">
PTR Guide
</span>
</a>
</li>
<li class="md-nav__item">
<a href="petitionPractices.html" class="md-nav__link">
<span class="md-ellipsis">
Petition Practices
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_13" checked>
<label class="md-nav__link" for="__nav_2_13" id="__nav_2_13_label" tabindex="">
<span class="md-ellipsis">
Next Steps
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_13_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2_13">
<span class="md-nav__icon md-icon"></span>
Next Steps
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="adding_new_downloaders.html" class="md-nav__link">
<span class="md-ellipsis">
Adding New Downloaders
</span>
</a>
</li>
<li class="md-nav__item">
<a href="getting_started_subscriptions.html" class="md-nav__link">
<span class="md-ellipsis">
Subscriptions
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
Filtering Duplicates
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="duplicates.html" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Filtering Duplicates
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#duplicates_page" class="md-nav__link">
<span class="md-ellipsis">
the duplicates processing page
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicate_filtering_page" class="md-nav__link">
<span class="md-ellipsis">
the filtering page
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_filter" class="md-nav__link">
<span class="md-ellipsis">
the duplicates filter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_decisions" class="md-nav__link">
<span class="md-ellipsis">
the decisions to make
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_merging" class="md-nav__link">
<span class="md-ellipsis">
merging metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#future" class="md-nav__link">
<span class="md-ellipsis">
what now?
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#game_cgs" class="md-nav__link">
<span class="md-ellipsis">
game cgs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples" class="md-nav__link">
<span class="md-ellipsis">
more information and examples
</span>
</a>
<nav class="md-nav" aria-label="more information and examples">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#duplicates_examples_better_worse" class="md-nav__link">
<span class="md-ellipsis">
better/worse
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples_same" class="md-nav__link">
<span class="md-ellipsis">
same quality duplicates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples_alternates" class="md-nav__link">
<span class="md-ellipsis">
alternates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples_false_positive" class="md-nav__link">
<span class="md-ellipsis">
not related/false positive
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#duplicates_advanced" class="md-nav__link">
<span class="md-ellipsis">
the duplicates system
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
Advanced
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Advanced
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="advanced_siblings.html" class="md-nav__link">
<span class="md-ellipsis">
Tag Siblings
</span>
</a>
</li>
<li class="md-nav__item">
<a href="advanced_parents.html" class="md-nav__link">
<span class="md-ellipsis">
Tag Parents
</span>
</a>
</li>
<li class="md-nav__item">
<a href="advanced_sidecars.html" class="md-nav__link">
<span class="md-ellipsis">
Sidecars
</span>
</a>
</li>
<li class="md-nav__item">
<a href="advanced_multiple_local_file_services.html" class="md-nav__link">
<span class="md-ellipsis">
Multiple Local File Services
</span>
</a>
</li>
<li class="md-nav__item">
<a href="advanced.html" class="md-nav__link">
<span class="md-ellipsis">
General Clever Tricks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="reducing_lag.html" class="md-nav__link">
<span class="md-ellipsis">
Reducing Lag
</span>
</a>
</li>
<li class="md-nav__item">
<a href="database_migration.html" class="md-nav__link">
<span class="md-ellipsis">
Database Migration
</span>
</a>
</li>
<li class="md-nav__item">
<a href="launch_arguments.html" class="md-nav__link">
<span class="md-ellipsis">
Launch Arguments
</span>
</a>
</li>
<li class="md-nav__item">
<a href="ipfs.html" class="md-nav__link">
<span class="md-ellipsis">
IPFS
</span>
</a>
</li>
<li class="md-nav__item">
<a href="server.html" class="md-nav__link">
<span class="md-ellipsis">
Running Your Own Server
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_11" >
<label class="md-nav__link" for="__nav_3_11" id="__nav_3_11_label" tabindex="0">
<span class="md-ellipsis">
Alternate Installations
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_11_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_11">
<span class="md-nav__icon md-icon"></span>
Alternate Installations
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="docker.html" class="md-nav__link">
<span class="md-ellipsis">
Docker
</span>
</a>
</li>
<li class="md-nav__item">
<a href="wine.html" class="md-nav__link">
<span class="md-ellipsis">
Running In Wine
</span>
</a>
</li>
<li class="md-nav__item">
<a href="running_from_source.html" class="md-nav__link">
<span class="md-ellipsis">
Running From Source
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_12" >
<label class="md-nav__link" for="__nav_3_12" id="__nav_3_12_label" tabindex="0">
<span class="md-ellipsis">
Downloader Creation
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_12_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_12">
<span class="md-nav__icon md-icon"></span>
Downloader Creation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="downloader_intro.html" class="md-nav__link">
<span class="md-ellipsis">
Introduction
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_gugs.html" class="md-nav__link">
<span class="md-ellipsis">
Gallery URL Generators
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_url_classes.html" class="md-nav__link">
<span class="md-ellipsis">
URL Classes
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_12_4" >
<label class="md-nav__link" for="__nav_3_12_4" id="__nav_3_12_4_label" tabindex="0">
<span class="md-ellipsis">
Parsers
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_3_12_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_12_4">
<span class="md-nav__icon md-icon"></span>
Parsers
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="downloader_parsers.html" class="md-nav__link">
<span class="md-ellipsis">
Overview
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_12_4_2" >
<label class="md-nav__link" for="__nav_3_12_4_2" id="__nav_3_12_4_2_label" tabindex="0">
<span class="md-ellipsis">
Components
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="4" aria-labelledby="__nav_3_12_4_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_12_4_2">
<span class="md-nav__icon md-icon"></span>
Components
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="downloader_parsers_formulae.html" class="md-nav__link">
<span class="md-ellipsis">
Formulae
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_parsers_content_parsers.html" class="md-nav__link">
<span class="md-ellipsis">
Content Parsers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_parsers_page_parsers.html" class="md-nav__link">
<span class="md-ellipsis">
Page Parsers
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_12_4_3" >
<label class="md-nav__link" for="__nav_3_12_4_3" id="__nav_3_12_4_3_label" tabindex="0">
<span class="md-ellipsis">
Walkthroughs
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="4" aria-labelledby="__nav_3_12_4_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_12_4_3">
<span class="md-nav__icon md-icon"></span>
Walkthroughs
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="downloader_parsers_full_example_gallery_page.html" class="md-nav__link">
<span class="md-ellipsis">
Gallery Page Example
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_parsers_full_example_file_page.html" class="md-nav__link">
<span class="md-ellipsis">
File Page Example
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_parsers_full_example_api.html" class="md-nav__link">
<span class="md-ellipsis">
API Example
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="downloader_completion.html" class="md-nav__link">
<span class="md-ellipsis">
Putting It All Together
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_sharing.html" class="md-nav__link">
<span class="md-ellipsis">
Sharing
</span>
</a>
</li>
<li class="md-nav__item">
<a href="downloader_login.html" class="md-nav__link">
<span class="md-ellipsis">
Login Manager
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-ellipsis">
API
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
API
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="client_api.html" class="md-nav__link">
<span class="md-ellipsis">
Client API
</span>
</a>
</li>
<li class="md-nav__item">
<a href="developer_api.html" class="md-nav__link">
<span class="md-ellipsis">
API documentation
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
<span class="md-ellipsis">
Misc
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
Misc
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="faq.html" class="md-nav__link">
<span class="md-ellipsis">
FAQ
</span>
</a>
</li>
<li class="md-nav__item">
<a href="privacy.html" class="md-nav__link">
<span class="md-ellipsis">
Privacy
</span>
</a>
</li>
<li class="md-nav__item">
<a href="contact.html" class="md-nav__link">
<span class="md-ellipsis">
Contact and Links
</span>
</a>
</li>
<li class="md-nav__item">
<a href="support.html" class="md-nav__link">
<span class="md-ellipsis">
Financial Support
</span>
</a>
</li>
<li class="md-nav__item">
<a href="changelog.html" class="md-nav__link">
<span class="md-ellipsis">
Changelog
</span>
</a>
</li>
<li class="md-nav__item">
<a href="about_docs.html" class="md-nav__link">
<span class="md-ellipsis">
About These Docs
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#duplicates_page" class="md-nav__link">
<span class="md-ellipsis">
the duplicates processing page
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicate_filtering_page" class="md-nav__link">
<span class="md-ellipsis">
the filtering page
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_filter" class="md-nav__link">
<span class="md-ellipsis">
the duplicates filter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_decisions" class="md-nav__link">
<span class="md-ellipsis">
the decisions to make
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_merging" class="md-nav__link">
<span class="md-ellipsis">
merging metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#future" class="md-nav__link">
<span class="md-ellipsis">
what now?
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#game_cgs" class="md-nav__link">
<span class="md-ellipsis">
game cgs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples" class="md-nav__link">
<span class="md-ellipsis">
more information and examples
</span>
</a>
<nav class="md-nav" aria-label="more information and examples">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#duplicates_examples_better_worse" class="md-nav__link">
<span class="md-ellipsis">
better/worse
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples_same" class="md-nav__link">
<span class="md-ellipsis">
same quality duplicates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples_alternates" class="md-nav__link">
<span class="md-ellipsis">
alternates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#duplicates_examples_false_positive" class="md-nav__link">
<span class="md-ellipsis">
not related/false positive
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#duplicates_advanced" class="md-nav__link">
<span class="md-ellipsis">
the duplicates system
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<a href="https://github.com/hydrusnetwork/hydrus/edit/master/docs/duplicates.md" title="Edit this page" class="md-content__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4v-2m10.2-7c.1 0 .3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1 23H12v-2.1l6.1-6.1 2.1 2.1Z"/></svg>
</a>
<h1 id="intro">duplicates<a class="headerlink" href="#intro" title="Permanent link">&para;</a></h1>
<p>As files are shared on the internet, they are often resized, cropped, converted to a different format, altered by the original or a new artist, or turned into a template and reinterpreted over and over and over. Even if you have a very restrictive importing workflow, your client is almost certainly going to get some <strong>duplicates</strong>. Some will be interesting alternate versions that you want to keep, and others will be thumbnails and other low-quality garbage you accidentally imported and would rather delete. Along the way, it would be nice to merge your ratings and tags to the better files so you don't lose any work.</p>
<p>Finding and processing duplicates within a large collection is impossible to do by hand, so I have written a system to do the heavy lifting for you. It currently works on still images, but an extension for gifs and video is planned.</p>
<p>Hydrus finds <em>potential</em> duplicates using a search algorithm that compares images by their shape. Once these pairs of potentials are found, they are presented to you through a filter like the archive/delete filter to determine their exact relationship and if you want to make a further action, such as deleting the 'worse' file of a pair. All of your decisions build up in the database to form logically consistent groups of duplicates and 'alternate' relationships that can be used to infer future information. For instance, if you say that file A is a duplicate of B and B is a duplicate of C, A and C are automatically recognised as duplicates as well.</p>
<p>This all starts on--</p>
<h2 id="duplicates_page">the duplicates processing page<a class="headerlink" href="#duplicates_page" title="Permanent link">&para;</a></h2>
<p>On the normal 'new page' selection window, hit <em>special-&gt;duplicates processing</em>. This will open this page:</p>
<p><img alt="" src="images/dupe_management.png" /></p>
<p>Let's go to the preparation page first:</p>
<p><img alt="" src="images/dupe_preparation.png" /></p>
<p>The 'similar shape' algorithm works on <em>distance</em>. Two files with 0 distance are likely exact matches, such as resizes of the same file or lower/higher quality jpegs, whereas those with distance 4 tend to be to be hairstyle or costume changes. You will be starting on distance 0 and not expect to ever go above 4 or 8 or so. Going too high increases the danger of being overwhelmed by false positives.</p>
<p>If you are interested, the current version of this system uses a 64-bit <a href="https://jenssegers.com/perceptual-image-hashes">phash</a> to represent the image shape and a <a href="https://en.wikipedia.org/wiki/VP-tree">VPTree</a> to search different files' phashes' relative <a href="https://en.wikipedia.org/wiki/Hamming_distance">hamming distance</a>. I expect to extend it in future with multiple phash generation (flips, rotations, and 'interesting' image crops and video frames) and most-common colour comparisons.</p>
<p>Searching for duplicates is fairly fast per file, but with a large client with hundreds of thousands of files, the total CPU time adds up. You can do a little manual searching if you like, but once you are all settled here, I recommend you hit the cog icon on the preparation page and let hydrus do this page's catch-up search work in your regular maintenance time. It'll swiftly catch up and keep you up to date without you even thinking about it.</p>
<p>Start searching on the 'exact match' search distance of 0. It is generally easier and more valuable to get exact duplicates out of the way first.</p>
<p>Once you have some files searched, you should see a potential pair count appear in the 'filtering' page.</p>
<h2 id="duplicate_filtering_page">the filtering page<a class="headerlink" href="#duplicate_filtering_page" title="Permanent link">&para;</a></h2>
<p><em>Processing duplicates can be real trudge-work if you do not set up a workflow you enjoy. It is a little slower than the archive/delete filter, and sometimes takes a bit more cognitive work. For many users, it is a good task to do while listening to a podcast or having a video going on another screen.</em></p>
<p>If you have a client with tens of thousands of files, you will likely have thousands of potential pairs. This can be intimidating, but do not worry--due to the A, B, C logical inferrences as above, you will not have to go through every single one. The more information you put into the system, the faster the number will drop.</p>
<p>The filter has a regular file search interface attached. As you can see, it defaults to <em>system:everything</em>, but you can limit what files you will be working on simply by adding new search predicates. You might like to only work on files in your archive (i.e. that you know you care about to begin with), for instance. You can choose whether both files of the pair should match the search, or just one. 'creator:' tags work very well at cutting the search domain to something more manageable and consistent--try your favourite creator!</p>
<p>If you would like an example from the current search domain, hit the 'show some random potential pairs' button, and it will show two or more files that seem related. It is often interesting and surprising to see what it finds! The action buttons below allow for quick processing of these pairs and groups when convenient (particularly for large cg sets with 100+ alternates), but I recommend you leave these alone until you know the system better.</p>
<p>When you are ready, launch the filter.</p>
<h2 id="duplicates_filter">the duplicates filter<a class="headerlink" href="#duplicates_filter" title="Permanent link">&para;</a></h2>
<p><em>We have not set up your duplicate 'merge' options yet, so do not get too into this. For this first time, just poke around, make some pretend choices, and then cancel out and choose to forget them.</em></p>
<p><img alt="" src="images/dupe_filter.png" /></p>
<p>Like the archive/delete filter, this uses quick mouse-clicks, keyboard shortcuts, or button clicks to action pairs. It presents two files at a time, labelled A and B, which you can quickly switch between just as in the normal media viewer. As soon as you action them, the next pair is shown. The two files will have their current zoom-size locked so they stay the same size (and in the same position) as you switch between them. Scroll your mouse wheel a couple of times and see if any obvious differences stand out.</p>
<p>Please note the hydrus media viewer does not currently work well with large resolutions at high zoom (it gets laggy and may have memory issues). Don't zoom in to 1600% and try to look at jpeg artifact differences on very large files, as this is simply not well supported yet.</p>
<p>The hover window on the right also presents a number of 'comparison statements' to help you make your decision. Green statements mean this current file is probably 'better', and red the opposite. Larger, older, higher-quality, more-tagged files are generally considered better. These statements have scores associated with them (which you can edit in <em>file-&gt;options-&gt;duplicates</em>), and the file of the pair with the highest score is presented first. If the files are duplicates, you can <em>generally</em> assume the first file you see, the 'A', is the better, particularly if there are several green statements.</p>
<p>The filter will need to occasionally checkpoint, saving the decisions so far to the database, before it can fetch the next batch. This allows it to apply inferred information from your current batch and reduce your pending count faster before serving up the next set. It will present you with a quick interstitial 'confirm/back' dialog just to let you know. This happens more often as the potential count decreases.</p>
<h2 id="duplicates_decisions">the decisions to make<a class="headerlink" href="#duplicates_decisions" title="Permanent link">&para;</a></h2>
<p>There are three ways a file can be related to another in the current duplicates system: duplicates, alternates, or false positive (not related).</p>
<p><strong>False positive (not related)</strong> is the easiest. You will not see completely unrelated pairs presented very often in the filter, particularly at low search distances, but if the shape of face and hair and clothing happen to line up (or geometric shapes, often), the search system may make a false positive match. In this case, just click 'they are not related'.</p>
<p><strong>Alternate</strong> relations are files that are not duplicates but obviously related in some way. Perhaps a costume change or a recolour. Hydrus does not have rich alternate support yet (but it is planned, and highly requested), so this relationship is mostly a 'holding area' for files that we will revisit for further processing in the future.</p>
<p><strong>Duplicate</strong> files are of <strong>the exact same thing</strong>. They may be different resolutions, file formats, encoding quality, or one might even have watermark, but they are fundamentally different views on the exact same art. As you can see with the buttons, you can select one file as the 'better' or say they are about the same. If the files are basically the same, there is no point stressing about which is 0.2% better--just click 'they are the same'. For better/worse pairs, you might have reason to keep both, but most of the time I recommend you delete the worse.</p>
<p>You can customise the shortcuts under <em>file-&gt;shortcuts-&gt;duplicate_filter</em>. The defaults are:</p>
<ul>
<li>
<p>Left-click: <strong>this is better, delete the other</strong>.</p>
</li>
<li>
<p>Right-click: <strong>they are related alternates</strong>.</p>
</li>
<li>
<p>Middle-click: <strong>Go back one decision.</strong></p>
</li>
<li>
<p>Enter/Escape: <strong>Stop filtering.</strong></p>
</li>
</ul>
<h2 id="duplicates_merging">merging metadata<a class="headerlink" href="#duplicates_merging" title="Permanent link">&para;</a></h2>
<p>If two duplicates have different metadata like tags or archive status, you probably want to merge them. Cancel out of the filter and click the 'edit default duplicate metadata merge options' button:</p>
<p><img alt="" src="images/dupe_merge_options.png" /></p>
<p>By default, these options are fairly empty. You will have to set up what you want based on your services and preferences. Setting a simple 'copy all tags' is generally a good idea, and like/dislike ratings also often make sense. The settings for better and same quality should probably be similar, but it depends on your situation.</p>
<p>If you choose the 'custom action' in the duplicate filter, you will be presented with a fresh 'edit duplicate merge options' panel for the action you select and can customise the merge specifically for that choice. ('favourite' options will come here in the future!)</p>
<p>Once you are all set up here, you can dive into the duplicate filter. Please let me know how you get on with it!</p>
<h2 id="future">what now?<a class="headerlink" href="#future" title="Permanent link">&para;</a></h2>
<p>The duplicate system is still incomplete. Now the db side is solid, the UI needs to catch up. Future versions will show duplicate information on thumbnails and the media viewer and allow quick-navigation to a file's duplicates and alternates.</p>
<p>For now, if you wish to see a file's duplicates, right-click it and select <em>file relationships</em>. You can review all its current duplicates, open them in a new page, appoint the new 'best file' of a duplicate group, and even mass-action selections of thumbnails.</p>
<p>You can also search for files based on the number of file relations they have (including when setting the search domain of the duplicate filter!) using <em>system:file relationships</em>. You can also search for best/not best files of groups, which makes it easy, for instance, to find all the spare duplicate files if you decide you no longer want to keep them.</p>
<p>I expect future versions of the system to also auto-resolve easy duplicate pairs, such as clearing out pixel-for-pixel png versions of jpgs.</p>
<h2 id="game_cgs">game cgs<a class="headerlink" href="#game_cgs" title="Permanent link">&para;</a></h2>
<p>If you import a lot of game CGs, which frequently have dozens or hundreds of alternates, I recommend you set them as alternates by selecting them all and setting the status through the thumbnail right-click menu. The duplicate filter, being limited to pairs, needs to compare all new members of an alternate group to all other members once to verify they are not duplicates. This is not a big deal for alternates with three or four members, but game CGs provide an overwhelming edge case. Setting a group of thumbnails as alternate 'fixes' their alternate status immediately, discounting the possibility of any internate duplicates, and provides an easy way out of this situation.</p>
<h2 id="duplicates_examples">more information and examples<a class="headerlink" href="#duplicates_examples" title="Permanent link">&para;</a></h2>
<h3 id="duplicates_examples_better_worse">better/worse<a class="headerlink" href="#duplicates_examples_better_worse" title="Permanent link">&para;</a></h3>
<p>Which of two files is better? Here are some common reasons:</p>
<ul>
<li>higher resolution</li>
<li>better image quality</li>
<li>png over jpg for screenshots</li>
<li>jpg over png for busy images</li>
<li>jpg over png for pixel-for-pixel duplicates</li>
<li>a better crop</li>
<li>no watermark or site-frame or undesired blemish</li>
<li>has been tagged by other people, so is likely to be the more 'popular'</li>
</ul>
<p>However these are not hard rules--sometimes a file has a larger resolution or filesize due to a bad upscaling or encoding decision by the person who 'reinterpreted' it. You really have to look at it and decide for yourself.</p>
<p>Here is a good example of a better/worse pair:</p>
<p><a href="images/dupe_better_1.png"><img alt="" src="images/dupe_better_1.png" /></a> <a href="images/dupe_better_2.jpg"><img alt="" src="images/dupe_better_2.jpg" /></a></p>
<p>The first image is better because it is a png (pixel-perfect pngs are always better than jpgs for screenshots of applications--note how obvious the jpg's encoding artifacts are on the flat colour background) and it has a slightly higher (original) resolution, making it less blurry. I presume the second went through some FunnyJunk-tier trash meme site to get automatically cropped to 960px height and converted to the significantly smaller jpeg. Whatever happened, let's drop the second and keep the first.</p>
<p>When both files are jpgs, differences in quality are very common and often significant:</p>
<p><a href="images/dupes_better_sg_a.jpg"><img alt="" src="images/dupes_better_sg_a.jpg" /></a> <a href="images/dupes_better_sg_b.jpg"><img alt="" src="images/dupes_better_sg_b.jpg" /></a></p>
<p>Again, this is mostly due to some online service resizing and lowering quality to ease on their bandwidth costs. There is usually no reason to keep the lower quality version.</p>
<h3 id="duplicates_examples_same">same quality duplicates<a class="headerlink" href="#duplicates_examples_same" title="Permanent link">&para;</a></h3>
<p>When are two files the same quality? A good rule of thumb is if you scroll between them and see no obvious differences, and the comparison statements do not suggest anything significant, just set them as same quality.</p>
<p>Here are two same quality duplicates:</p>
<p><a href="images/dupe_exact_match_1.png"><img alt="" src="images/dupe_exact_match_1.png" /></a> <a href="images/dupe_exact_match_2.png"><img alt="" src="images/dupe_exact_match_2.png" /></a></p>
<p>There is no obvious different between those two. The filesize is significantly different, so I suspect the smaller is a lossless png optimisation, but in the grand scheme of things, that doesn't matter so much. Many of the big content providers--Facebook, Google, Cloudflare--automatically 'optimise' the data that goes through their networks in order to save bandwidth. Although jpegs are often a slaughterhouse, with pngs it is usually harmless.</p>
<p>Given the filesize, you might decide that these are actually a better/worse pair--but if the larger image had tags and was the 'canonical' version on most boorus, the decision might not be so clear. You can choose better/worse and delete one randomly, but sometimes you may just want to keep both without a firm decision on which is best, so just set 'same quality' and move on. Your time is more valuable than a few dozen KB.</p>
<p>Sometimes, you will see pixel-for-pixel duplicate jpegs of very slightly different size, such as 787KB vs 779KB. The smaller of these is usually an exact duplicate that has had its internal metadata (e.g. EXIF tags) stripped by a program or website CDN. They are same quality unless you have a strong opinion on whether having internal metadata in a file is useful.</p>
<h3 id="duplicates_examples_alternates">alternates<a class="headerlink" href="#duplicates_examples_alternates" title="Permanent link">&para;</a></h3>
<p>As I wrote above, hydrus's alternates system in not yet properly ready. It is important to have a basic 'alternates' relationship for now, but it is a holding area until we have a workflow to apply 'WIP'- or 'recolour'-type labels and present that information nicely in the media viewer.</p>
<p>Alternates are not of exactly the same thing, but one is variant of the other or they are both descended from a common original. The precise definition is up to you, but it generally means something like:</p>
<ul>
<li>the files are recolours</li>
<li>the files are alternate versions of the same image produced by the same or different artists (e.g. clean/messy or with/without hair ribbon)</li>
<li>iterations on a close template</li>
<li>different versions of a file's progress, such as the steps from the initial draft sketch to a final shaded version</li>
</ul>
<p>Here are some recolours of the same image:</p>
<p><a href="images/dupe_alternates_recolours.png"><img alt="" src="images/dupe_alternates_recolours.png" /></a></p>
<p>And some WIP:</p>
<p><a href="images/dupe_alternates_progress.png"><img alt="" src="images/dupe_alternates_progress.png" /></a></p>
<p>And a costume change:</p>
<p><a href="images/dupe_alternates_costume.png"><img alt="" src="images/dupe_alternates_costume.png" /></a></p>
<p>None of these are duplicates, but they are obviously related. The duplicate search will notice they are similar, so we should let the client know they are 'alternate'.</p>
<p>Here's a subtler case:</p>
<p><a href="images/dupe_alternate_boxer_a.jpg"><img alt="" src="images/dupe_alternate_boxer_a.jpg" /></a> <a href="images/dupe_alternate_boxer_b.jpg"><img alt="" src="images/dupe_alternate_boxer_b.jpg" /></a></p>
<p>These two files are very similar, but try opening both in separate tabs and then flicking back and forth: the second's glove-string is further into the mouth and has improved chin shading, a more refined eye shape, and shaved pubic hair. It is simple to spot these differences in the client's duplicate filter when you scroll back and forth.</p>
<p>I believe the second is an improvement on the first by the same artist, so it is a WIP alternate. You might also consider it a 'better' improvement.</p>
<p>Here are three files you might or might not consider to be alternates:</p>
<p><a href="images/dupe_alternate_1.jpg"><img alt="" src="images/dupe_alternate_1.jpg" /></a></p>
<p><a href="images/dupe_alternate_2.jpg"><img alt="" src="images/dupe_alternate_2.jpg" /></a></p>
<p><a href="images/dupe_alternate_3.jpg"><img alt="" src="images/dupe_alternate_3.jpg" /></a></p>
<p>These are all based on the same template--which is why the dupe filter found them--but they are not so closely related as those above, and the last one is joking about a different ideology entirely and might deserve to be in its own group. Ultimately, you might prefer just to give them some shared tag and consider them not alternates <em>per se</em>.</p>
<h3 id="duplicates_examples_false_positive">not related/false positive<a class="headerlink" href="#duplicates_examples_false_positive" title="Permanent link">&para;</a></h3>
<p>Here are two files that match false positively:</p>
<p><a href="images/dupe_not_dupes_1.png"><img alt="" src="images/dupe_not_dupes_1.png" /></a></p>
<p><a href="images/dupe_not_dupes_2.jpg"><img alt="" src="images/dupe_not_dupes_2.jpg" /></a></p>
<p>Despite their similar shape, they are neither duplicates nor of even the same topic. The only commonality is the medium. I would not consider them close enough to be alternates--just adding something like 'screenshot' and 'imageboard' as tags to both is probably the closest connection they have.</p>
<p>Recording the 'false positive' relationship is important to make sure the comparison does not come up again in the duplicate filter.</p>
<p>The incidence of false positives increases as you broaden the search distance--the less precise your search, the less likely it is to be correct. At distance 14, these files all match, but uselessly:</p>
<p><a href="images/dupe_garbage.png"><img alt="" src="images/dupe_garbage.png" /></a></p>
<h2 id="duplicates_advanced">the duplicates system<a class="headerlink" href="#duplicates_advanced" title="Permanent link">&para;</a></h2>
<p><em>(advanced nonsense, you can skip this section. tl;dr: duplicate file groups keep track of their best quality file, sometimes called the King)</em></p>
<p>Hydrus achieves duplicate transitivity by treating duplicate files as groups. Although you action pairs, if you set (A duplicate B), that creates a group (A,B). Subsequently setting (B duplicate C) extends the group to be (A,B,C), and so (A duplicate C) is transitively implied.</p>
<p>The first version of the duplicate system attempted to record better/worse/same information for all files in a virtual duplicate group, but this proved very complicated, workflow-heavy, and not particularly useful. The new system instead appoints a single <em>King</em> as the best file of a group. All other files in the group are beneath the King and have no other relationship data retained.</p>
<p><img alt="" src="images/dupe_dupe_group_diagram.png" /></p>
<p>This King represents the group in the duplicate filter (and in potential pairs, which are actually recorded between duplicate media groups--even if most of them at the outset only have one member). If the other file in a pair is considered better, it becomes the new King, but if it is worse or equal, it <a href="images/dupe_simple_merge.png">merges into the other members</a>. When two Kings are compared, <a href="images/dupe_group_merge.png">whole groups can merge</a>!</p>
<p>Alternates are stored in a similar way, except <a href="images/dupe_alternate_group_diagram.png">the members are duplicate groups</a> rather than individual files and they have no significant internal relationship metadata yet. If α, β, and γ are duplicate groups that each have one or more files, then setting (α alt β) and (β alt γ) creates an alternate group (α,β,γ), with the caveat that α and γ will still be sent to the duplicate filter once just to check they are not duplicates by chance. The specific file members of these groups, A, B, C and so on, inherit the relationships of their parent groups when you right-click on their thumbnails.</p>
<p>False positive relationships are stored between pairs of alternate groups, so they apply transitively between all the files of either side's alternate group. If (α alt β) and (ψ alt ω) and you apply (α fp ψ), then (α fp ω), (β fp ψ), and (β fp ω) are all transitively implied.</p>
<details class="example">
<summary>More examples</summary>
<p><a href="images/dupe_whole_system_diagram.png"><img alt="" src="images/dupe_whole_system_diagram.png" title="Some fun." /></a>
<a href="images/dupe_whole_system_simple_diagram.png"><img alt="" src="images/dupe_whole_system_simple_diagram.png" title="And simpler." /></a></p>
</details>
<aside class="md-source-file">
<span class="md-source-file__fact">
<span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
</span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">February 28, 2024</span>
</span>
</aside>
</article>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var tab,labels=set.querySelector(".tabbed-labels");for(tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12Z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
<div class="md-social">
<a href="https://github.com/hydrusnetwork/hydrus" target="_blank" rel="noopener" title="github.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</a>
<a href="https://twitter.com/hydrusnetwork" target="_blank" rel="noopener" title="twitter.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"/></svg>
</a>
<a href="https://hydrus.tumblr.com/" target="_blank" rel="noopener" title="hydrus.tumblr.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 320 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M309.8 480.3c-13.6 14.5-50 31.7-97.4 31.7-120.8 0-147-88.8-147-140.6v-144H17.9c-5.5 0-10-4.5-10-10v-68c0-7.2 4.5-13.6 11.3-16 62-21.8 81.5-76 84.3-117.1.8-11 6.5-16.3 16.1-16.3h70.9c5.5 0 10 4.5 10 10v115.2h83c5.5 0 10 4.4 10 9.9v81.7c0 5.5-4.5 10-10 10h-83.4V360c0 34.2 23.7 53.6 68 35.8 4.8-1.9 9-3.2 12.7-2.2 3.5.9 5.8 3.4 7.4 7.9l22 64.3c1.8 5 3.3 10.6-.4 14.5z"/></svg>
</a>
<a href="mailto:hydrus.admin@gmail.com" target="_blank" rel="noopener" title="" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M48 64C21.5 64 0 85.5 0 112c0 15.1 7.1 29.3 19.2 38.4l217.6 163.2c11.4 8.5 27 8.5 38.4 0l217.6-163.2c12.1-9.1 19.2-23.3 19.2-38.4 0-26.5-21.5-48-48-48H48zM0 176v208c0 35.3 28.7 64 64 64h384c35.3 0 64-28.7 64-64V176L294.4 339.2a63.9 63.9 0 0 1-76.8 0L0 176z"/></svg>
</a>
<a href="https://discord.gg/wPHPCUZ" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M524.531 69.836a1.5 1.5 0 0 0-.764-.7A485.065 485.065 0 0 0 404.081 32.03a1.816 1.816 0 0 0-1.923.91 337.461 337.461 0 0 0-14.9 30.6 447.848 447.848 0 0 0-134.426 0 309.541 309.541 0 0 0-15.135-30.6 1.89 1.89 0 0 0-1.924-.91 483.689 483.689 0 0 0-119.688 37.107 1.712 1.712 0 0 0-.788.676C39.068 183.651 18.186 294.69 28.43 404.354a2.016 2.016 0 0 0 .765 1.375 487.666 487.666 0 0 0 146.825 74.189 1.9 1.9 0 0 0 2.063-.676A348.2 348.2 0 0 0 208.12 430.4a1.86 1.86 0 0 0-1.019-2.588 321.173 321.173 0 0 1-45.868-21.853 1.885 1.885 0 0 1-.185-3.126 251.047 251.047 0 0 0 9.109-7.137 1.819 1.819 0 0 1 1.9-.256c96.229 43.917 200.41 43.917 295.5 0a1.812 1.812 0 0 1 1.924.233 234.533 234.533 0 0 0 9.132 7.16 1.884 1.884 0 0 1-.162 3.126 301.407 301.407 0 0 1-45.89 21.83 1.875 1.875 0 0 0-1 2.611 391.055 391.055 0 0 0 30.014 48.815 1.864 1.864 0 0 0 2.063.7A486.048 486.048 0 0 0 610.7 405.729a1.882 1.882 0 0 0 .765-1.352c12.264-126.783-20.532-236.912-86.934-334.541ZM222.491 337.58c-28.972 0-52.844-26.587-52.844-59.239s23.409-59.241 52.844-59.241c29.665 0 53.306 26.82 52.843 59.239 0 32.654-23.41 59.241-52.843 59.241Zm195.38 0c-28.971 0-52.843-26.587-52.843-59.239s23.409-59.241 52.843-59.241c29.667 0 53.307 26.82 52.844 59.239 0 32.654-23.177 59.241-52.844 59.241Z"/></svg>
</a>
<a href="https://www.patreon.com/hydrus_dev" target="_blank" rel="noopener" title="www.patreon.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M489.7 153.8c-.1-65.4-51-119-110.7-138.3C304.8-8.5 207-5 136.1 28.4 50.3 68.9 23.3 157.7 22.3 246.2 21.5 319 28.7 510.6 136.9 512c80.3 1 92.3-102.5 129.5-152.3 26.4-35.5 60.5-45.5 102.4-55.9 72-17.8 121.1-74.7 121-150z"/></svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": ".", "features": ["navigation.tracking", "navigation.sections", "navigation.tabs", "content.tabs.link", "navigation.top", "search.suggest", "content.code.annotate", "navigation.instant", "content.action.edit"], "search": "assets/javascripts/workers/search.b8dbb3d2.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
<script src="assets/javascripts/bundle.ebd0bdb7.min.js"></script>
</body>
</html>