app: improve file magic matching
Change file magic matching from using a simple boolean "magic matches" logic to using a matching quality. The quality is measured by the number of bytes that matched. Matching a single file procedure's magics now tries all magics and returns the best match quality. Searching a file procedure for a given file now tries all file procedures and returns the one with the best match quality. This fixes raw camera files being opened as TIFF, given a better magic than just the generic TIFF magic is provided.
This commit is contained in:
@ -40,9 +40,10 @@
|
|||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
FILE_MATCH_NONE,
|
/* positive values indicate the lenght of a matching magic */
|
||||||
FILE_MATCH_MAGIC,
|
|
||||||
FILE_MATCH_SIZE
|
FILE_MATCH_NONE = 0,
|
||||||
|
FILE_MATCH_SIZE = -1
|
||||||
} FileMatchType;
|
} FileMatchType;
|
||||||
|
|
||||||
|
|
||||||
@ -121,6 +122,8 @@ file_procedure_find (GSList *procs,
|
|||||||
gboolean opened = FALSE;
|
gboolean opened = FALSE;
|
||||||
gsize head_size = 0;
|
gsize head_size = 0;
|
||||||
guchar head[256];
|
guchar head[256];
|
||||||
|
FileMatchType best_match_val = FILE_MATCH_NONE;
|
||||||
|
GimpPlugInProcedure *best_file_proc = NULL;
|
||||||
|
|
||||||
for (list = procs; list; list = g_slist_next (list))
|
for (list = procs; list; list = g_slist_next (list))
|
||||||
{
|
{
|
||||||
@ -172,24 +175,28 @@ file_procedure_find (GSList *procs,
|
|||||||
}
|
}
|
||||||
else if (match_val != FILE_MATCH_NONE)
|
else if (match_val != FILE_MATCH_NONE)
|
||||||
{
|
{
|
||||||
g_object_unref (input);
|
g_printerr ("magic match %d on %s\n",
|
||||||
|
match_val,
|
||||||
|
gimp_object_get_name (file_proc));
|
||||||
|
|
||||||
return file_proc;
|
if (match_val > best_match_val)
|
||||||
|
{
|
||||||
|
best_match_val = match_val;
|
||||||
|
best_file_proc = file_proc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (input)
|
if (input)
|
||||||
{
|
|
||||||
#if 0
|
|
||||||
if (ferror (ifp))
|
|
||||||
g_set_error_literal (error, G_FILE_ERROR,
|
|
||||||
g_file_error_from_errno (errno),
|
|
||||||
g_strerror (errno));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
g_object_unref (input);
|
g_object_unref (input);
|
||||||
|
|
||||||
|
if (best_file_proc)
|
||||||
|
{
|
||||||
|
g_printerr ("best magic match on %s\n",
|
||||||
|
gimp_object_get_name (best_file_proc));
|
||||||
|
return best_file_proc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -414,7 +421,7 @@ file_check_single_magic (const gchar *offset,
|
|||||||
FileMatchType found = FILE_MATCH_NONE;
|
FileMatchType found = FILE_MATCH_NONE;
|
||||||
glong offs;
|
glong offs;
|
||||||
gulong num_testval;
|
gulong num_testval;
|
||||||
gulong num_operatorval;
|
gulong num_operator_val;
|
||||||
gint numbytes, k;
|
gint numbytes, k;
|
||||||
const gchar *num_operator_ptr;
|
const gchar *num_operator_ptr;
|
||||||
gchar num_operator;
|
gchar num_operator;
|
||||||
@ -461,18 +468,20 @@ file_check_single_magic (const gchar *offset,
|
|||||||
if (g_ascii_isdigit (num_operator_ptr[1]))
|
if (g_ascii_isdigit (num_operator_ptr[1]))
|
||||||
{
|
{
|
||||||
if (num_operator_ptr[1] != '0') /* decimal */
|
if (num_operator_ptr[1] != '0') /* decimal */
|
||||||
sscanf (num_operator_ptr+1, "%lu", &num_operatorval);
|
sscanf (num_operator_ptr+1, "%lu", &num_operator_val);
|
||||||
else if (num_operator_ptr[2] == 'x') /* hexadecimal */
|
else if (num_operator_ptr[2] == 'x') /* hexadecimal */
|
||||||
sscanf (num_operator_ptr+3, "%lx", &num_operatorval);
|
sscanf (num_operator_ptr+3, "%lx", &num_operator_val);
|
||||||
else /* octal */
|
else /* octal */
|
||||||
sscanf (num_operator_ptr+2, "%lo", &num_operatorval);
|
sscanf (num_operator_ptr+2, "%lo", &num_operator_val);
|
||||||
|
|
||||||
num_operator = *num_operator_ptr;
|
num_operator = *num_operator_ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numbytes > 0) /* Numerical test ? */
|
if (numbytes > 0)
|
||||||
{
|
{
|
||||||
|
/* Numerical test */
|
||||||
|
|
||||||
gchar num_test = '=';
|
gchar num_test = '=';
|
||||||
gulong fileval = 0;
|
gulong fileval = 0;
|
||||||
|
|
||||||
@ -489,8 +498,10 @@ file_check_single_magic (const gchar *offset,
|
|||||||
if (errno != 0)
|
if (errno != 0)
|
||||||
return FILE_MATCH_NONE;
|
return FILE_MATCH_NONE;
|
||||||
|
|
||||||
if (numbytes == 5) /* Check for file size ? */
|
if (numbytes == 5)
|
||||||
{
|
{
|
||||||
|
/* Check for file size */
|
||||||
|
|
||||||
GFileInfo *info = g_file_query_info (file,
|
GFileInfo *info = g_file_query_info (file,
|
||||||
G_FILE_ATTRIBUTE_STANDARD_SIZE,
|
G_FILE_ATTRIBUTE_STANDARD_SIZE,
|
||||||
G_FILE_QUERY_INFO_NONE,
|
G_FILE_QUERY_INFO_NONE,
|
||||||
@ -502,13 +513,17 @@ file_check_single_magic (const gchar *offset,
|
|||||||
g_object_unref (info);
|
g_object_unref (info);
|
||||||
}
|
}
|
||||||
else if (offs >= 0 &&
|
else if (offs >= 0 &&
|
||||||
(offs + numbytes <= headsize)) /* We have it in memory ? */
|
(offs + numbytes <= headsize))
|
||||||
{
|
{
|
||||||
|
/* We have it in memory */
|
||||||
|
|
||||||
for (k = 0; k < numbytes; k++)
|
for (k = 0; k < numbytes; k++)
|
||||||
fileval = (fileval << 8) | (glong) file_head[offs + k];
|
fileval = (fileval << 8) | (glong) file_head[offs + k];
|
||||||
}
|
}
|
||||||
else /* Read it from file */
|
else
|
||||||
{
|
{
|
||||||
|
/* Read it from file */
|
||||||
|
|
||||||
if (! g_seekable_seek (G_SEEKABLE (input), offs,
|
if (! g_seekable_seek (G_SEEKABLE (input), offs,
|
||||||
(offs >= 0) ? G_SEEK_SET : G_SEEK_END,
|
(offs >= 0) ? G_SEEK_SET : G_SEEK_END,
|
||||||
NULL, NULL))
|
NULL, NULL))
|
||||||
@ -532,20 +547,31 @@ file_check_single_magic (const gchar *offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (num_operator == '&')
|
if (num_operator == '&')
|
||||||
fileval &= num_operatorval;
|
fileval &= num_operator_val;
|
||||||
|
|
||||||
if (num_test == '<')
|
if (num_test == '<')
|
||||||
found = (fileval < num_testval);
|
{
|
||||||
|
if (fileval < num_testval)
|
||||||
|
found = numbytes;
|
||||||
|
}
|
||||||
else if (num_test == '>')
|
else if (num_test == '>')
|
||||||
found = (fileval > num_testval);
|
{
|
||||||
|
if (fileval > num_testval)
|
||||||
|
found = numbytes;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
found = (fileval == num_testval);
|
{
|
||||||
|
if (fileval == num_testval)
|
||||||
|
found = numbytes;
|
||||||
|
}
|
||||||
|
|
||||||
if (found && (numbytes == 5))
|
if (found && (numbytes == 5))
|
||||||
found = FILE_MATCH_SIZE;
|
found = FILE_MATCH_SIZE;
|
||||||
}
|
}
|
||||||
else if (numbytes == 0) /* String test */
|
else if (numbytes == 0)
|
||||||
{
|
{
|
||||||
|
/* String test */
|
||||||
|
|
||||||
gchar mem_testval[256];
|
gchar mem_testval[256];
|
||||||
|
|
||||||
file_convert_string (value,
|
file_convert_string (value,
|
||||||
@ -556,20 +582,23 @@ file_check_single_magic (const gchar *offset,
|
|||||||
return FILE_MATCH_NONE;
|
return FILE_MATCH_NONE;
|
||||||
|
|
||||||
if (offs >= 0 &&
|
if (offs >= 0 &&
|
||||||
(offs + numbytes <= headsize)) /* We have it in memory ? */
|
(offs + numbytes <= headsize))
|
||||||
{
|
{
|
||||||
found = (memcmp (mem_testval, file_head + offs, numbytes) == 0);
|
/* We have it in memory */
|
||||||
|
|
||||||
|
if (memcmp (mem_testval, file_head + offs, numbytes) == 0)
|
||||||
|
found = numbytes;
|
||||||
}
|
}
|
||||||
else /* Read it from file */
|
else
|
||||||
{
|
{
|
||||||
|
/* Read it from file */
|
||||||
|
|
||||||
if (! g_seekable_seek (G_SEEKABLE (input), offs,
|
if (! g_seekable_seek (G_SEEKABLE (input), offs,
|
||||||
(offs >= 0) ? G_SEEK_SET : G_SEEK_END,
|
(offs >= 0) ? G_SEEK_SET : G_SEEK_END,
|
||||||
NULL, NULL))
|
NULL, NULL))
|
||||||
return FILE_MATCH_NONE;
|
return FILE_MATCH_NONE;
|
||||||
|
|
||||||
found = FILE_MATCH_MAGIC;
|
for (k = 0; k < numbytes; k++)
|
||||||
|
|
||||||
for (k = 0; found && (k < numbytes); k++)
|
|
||||||
{
|
{
|
||||||
guchar byte;
|
guchar byte;
|
||||||
GError *error = NULL;
|
GError *error = NULL;
|
||||||
@ -579,12 +608,15 @@ file_check_single_magic (const gchar *offset,
|
|||||||
if (error)
|
if (error)
|
||||||
{
|
{
|
||||||
g_clear_error (&error);
|
g_clear_error (&error);
|
||||||
|
|
||||||
return FILE_MATCH_NONE;
|
return FILE_MATCH_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (byte != mem_testval[k])
|
if (byte != mem_testval[k])
|
||||||
found = FILE_MATCH_NONE;
|
return FILE_MATCH_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
found = numbytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -599,36 +631,89 @@ file_check_magic_list (GSList *magics_list,
|
|||||||
GInputStream *input)
|
GInputStream *input)
|
||||||
|
|
||||||
{
|
{
|
||||||
|
gboolean and = FALSE;
|
||||||
|
gboolean found = FALSE;
|
||||||
|
FileMatchType best_match_val = FILE_MATCH_NONE;
|
||||||
|
FileMatchType match_val = FILE_MATCH_NONE;
|
||||||
|
|
||||||
|
for (; magics_list; magics_list = magics_list->next)
|
||||||
|
{
|
||||||
const gchar *offset;
|
const gchar *offset;
|
||||||
const gchar *type;
|
const gchar *type;
|
||||||
const gchar *value;
|
const gchar *value;
|
||||||
gboolean and = FALSE;
|
FileMatchType single_match_val = FILE_MATCH_NONE;
|
||||||
gboolean found = FALSE;
|
|
||||||
FileMatchType match_val;
|
|
||||||
|
|
||||||
while (magics_list)
|
if ((offset = magics_list->data) == NULL) return FILE_MATCH_NONE;
|
||||||
{
|
if ((magics_list = magics_list->next) == NULL) return FILE_MATCH_NONE;
|
||||||
if ((offset = magics_list->data) == NULL) break;
|
if ((type = magics_list->data) == NULL) return FILE_MATCH_NONE;
|
||||||
if ((magics_list = magics_list->next) == NULL) break;
|
if ((magics_list = magics_list->next) == NULL) return FILE_MATCH_NONE;
|
||||||
if ((type = magics_list->data) == NULL) break;
|
if ((value = magics_list->data) == NULL) return FILE_MATCH_NONE;
|
||||||
if ((magics_list = magics_list->next) == NULL) break;
|
|
||||||
if ((value = magics_list->data) == NULL) break;
|
|
||||||
|
|
||||||
magics_list = magics_list->next;
|
single_match_val = file_check_single_magic (offset, type, value,
|
||||||
|
|
||||||
match_val = file_check_single_magic (offset, type, value,
|
|
||||||
head, headsize,
|
head, headsize,
|
||||||
file, input);
|
file, input);
|
||||||
|
|
||||||
if (and)
|
if (and)
|
||||||
found = found && (match_val != FILE_MATCH_NONE);
|
found = found && (single_match_val != FILE_MATCH_NONE);
|
||||||
else
|
else
|
||||||
found = (match_val != FILE_MATCH_NONE);
|
found = (single_match_val != FILE_MATCH_NONE);
|
||||||
|
|
||||||
|
if (match_val == FILE_MATCH_NONE)
|
||||||
|
{
|
||||||
|
/* if we have no match yet, this is it in any case */
|
||||||
|
|
||||||
|
match_val = single_match_val;
|
||||||
|
}
|
||||||
|
else if (single_match_val != FILE_MATCH_NONE)
|
||||||
|
{
|
||||||
|
/* else if we have a match on this one, combine it with the
|
||||||
|
* existing return value
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (single_match_val == FILE_MATCH_SIZE)
|
||||||
|
{
|
||||||
|
/* if we already have a magic match, simply increase
|
||||||
|
* that by one to indicate "better match", not perfect
|
||||||
|
* but better than losing the additional size match
|
||||||
|
* entirely
|
||||||
|
*/
|
||||||
|
if (match_val != FILE_MATCH_SIZE)
|
||||||
|
match_val += 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* if we already have a magic match, simply add to its
|
||||||
|
* length; otherwise if we already have a size match,
|
||||||
|
* combine it with this match, see comment above
|
||||||
|
*/
|
||||||
|
if (match_val != FILE_MATCH_SIZE)
|
||||||
|
match_val += single_match_val;
|
||||||
|
else
|
||||||
|
match_val = single_match_val + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (best_match_val == FILE_MATCH_NONE)
|
||||||
|
{
|
||||||
|
/* if we have no best match yet, this is it */
|
||||||
|
|
||||||
|
best_match_val = match_val;
|
||||||
|
}
|
||||||
|
else if (match_val != FILE_MATCH_NONE)
|
||||||
|
{
|
||||||
|
/* otherwise if this was a match, update the best match, note
|
||||||
|
* that by using MAX we will not overwrite a magic match
|
||||||
|
* with a size match
|
||||||
|
*/
|
||||||
|
|
||||||
|
best_match_val = MAX (best_match_val, match_val);
|
||||||
|
}
|
||||||
|
|
||||||
and = (strchr (offset, '&') != NULL);
|
and = (strchr (offset, '&') != NULL);
|
||||||
|
|
||||||
if (! and && found)
|
if (! and)
|
||||||
return match_val;
|
match_val = FILE_MATCH_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
return FILE_MATCH_NONE;
|
return best_match_val;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user