2006 |
|
![]() | Marcel Worring, Cees G M Snoek, Bouke Huurnink, Jan C van Gemert, Dennis Koelma, Ork de Rooij: The MediaMill Large-lexicon Concept Suggestion Engine. MM, pp. 785–786, Santa Barbara, USA, 2006. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{WorringACM06, title = {The MediaMill Large-lexicon Concept Suggestion Engine}, author = {Marcel Worring and Cees G M Snoek and Bouke Huurnink and Jan C van Gemert and Dennis Koelma and Ork de Rooij}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/worring-demo-acm2006.pdf}, year = {2006}, date = {2006-10-01}, booktitle = {MM}, pages = {785--786}, address = {Santa Barbara, USA}, abstract = {In this technical demonstration we show the current version of the MediaMill system, a search engine that facilitates access to news video archives at a semantic level. The core of the system is a lexicon of 436 automatically detected semantic concepts. To handle such a large lexicon in retrieval, an engine is developed which automatically selects a set of relevant concepts based on the textual query and example images. The result set can be browsed easily to obtain the final result for the query.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this technical demonstration we show the current version of the MediaMill system, a search engine that facilitates access to news video archives at a semantic level. The core of the system is a lexicon of 436 automatically detected semantic concepts. To handle such a large lexicon in retrieval, an engine is developed which automatically selects a set of relevant concepts based on the textual query and example images. The result set can be browsed easily to obtain the final result for the query. |
![]() | Marcel Worring, Cees G M Snoek, Ork de Rooij, Giang P Nguyen, Dennis C Koelma: Lexicon-based Browsers for Searching in News Video Archives. ICPR, pp. 1256–1259, Hong Kong, China, 2006. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{WorringICPR06, title = {Lexicon-based Browsers for Searching in News Video Archives}, author = {Marcel Worring and Cees G M Snoek and Ork de Rooij and Giang P Nguyen and Dennis C Koelma}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/worring-browsers-icpr2006.pdf}, year = {2006}, date = {2006-08-01}, booktitle = {ICPR}, pages = {1256--1259}, address = {Hong Kong, China}, abstract = {In this paper we present the methods and visualizations used in the MediaMill video search engine. The basis for the engine is a semantic indexing process which derives a lexicon of 101 concepts. To support the user in navigating the collection, the system defines a visual similarity space, a semantic similarity space, a semantic thread space, and browsers to explore them. The search system is evaluated within the TRECVID benchmark. We obtain a top-3 result for 19 out of 24 search topics. In addition, we obtain the highest mean average precision of all search participants.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this paper we present the methods and visualizations used in the MediaMill video search engine. The basis for the engine is a semantic indexing process which derives a lexicon of 101 concepts. To support the user in navigating the collection, the system defines a visual similarity space, a semantic similarity space, a semantic thread space, and browsers to explore them. The search system is evaluated within the TRECVID benchmark. We obtain a top-3 result for 19 out of 24 search topics. In addition, we obtain the highest mean average precision of all search participants. |
![]() | Cees G M Snoek, Marcel Worring, Dennis C Koelma, Arnold W M Smeulders: Learned Lexicon-driven Interactive Video Retrieval. Sundaram, H; others, (Ed.): CIVR, pp. 11–20, Springer-Verlag, Heidelberg, Germany, 2006. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekCIVR06, title = {Learned Lexicon-driven Interactive Video Retrieval}, author = {Cees G M Snoek and Marcel Worring and Dennis C Koelma and Arnold W M Smeulders}, editor = {H Sundaram and others}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-lexicon-civr2006.pdf}, year = {2006}, date = {2006-07-01}, booktitle = {CIVR}, volume = {4071}, pages = {11--20}, publisher = {Springer-Verlag}, address = {Heidelberg, Germany}, series = {LNCS}, abstract = {We combine in this paper automatic learning of a large lexicon of semantic concepts with traditional video retrieval methods into a novel approach to narrow the semantic gap. The core of the proposed solution is formed by the automatic detection of an unprecedented lexicon of 101 concepts. From there, we explore the combination of query-by-concept, query-by-example, query-by-keyword, and user interaction into the emphMediaMill semantic video search engine. We evaluate the search engine against the 2005 NIST TRECVID video retrieval benchmark, using an international broadcast news archive of 85 hours. Top ranking results show that the lexicon-driven search engine is highly effective for interactive video retrieval.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We combine in this paper automatic learning of a large lexicon of semantic concepts with traditional video retrieval methods into a novel approach to narrow the semantic gap. The core of the proposed solution is formed by the automatic detection of an unprecedented lexicon of 101 concepts. From there, we explore the combination of query-by-concept, query-by-example, query-by-keyword, and user interaction into the emphMediaMill semantic video search engine. We evaluate the search engine against the 2005 NIST TRECVID video retrieval benchmark, using an international broadcast news archive of 85 hours. Top ranking results show that the lexicon-driven search engine is highly effective for interactive video retrieval. |
![]() | Cees G M Snoek, Marcel Worring, Jan-Mark Geusebroek, Dennis C Koelma, Frank J Seinstra, Arnold W M Smeulders: The Semantic Pathfinder for Generic News Video Indexing. ICME, pp. 1469–1472, Toronto, Canada, 2006. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekICME06, title = {The Semantic Pathfinder for Generic News Video Indexing}, author = {Cees G M Snoek and Marcel Worring and Jan-Mark Geusebroek and Dennis C Koelma and Frank J Seinstra and Arnold W M Smeulders}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-pathfinder-icme2006.pdf}, year = {2006}, date = {2006-07-01}, booktitle = {ICME}, pages = {1469--1472}, address = {Toronto, Canada}, abstract = {This paper presents the semantic pathfinder architecture for generic indexing of video archives. The pathfinder automatically extracts semantic concepts from video based on the exploration of different paths through three consecutive analysis steps, closely linked to the video production process, namely: content analysis, style analysis, and context analysis. The virtue of the semantic pathfinder is its learned ability to find a best path of analysis steps on a per-concept basis. To show the generality of this indexing approach we develop detectors for a lexicon of 32 concepts and we evaluate the semantic pathfinder against the 2004 NIST TRECVID video retrieval benchmark, using a news archive of 64 hours. Top ranking performance indicates the merit of the semantic pathfinder.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper presents the semantic pathfinder architecture for generic indexing of video archives. The pathfinder automatically extracts semantic concepts from video based on the exploration of different paths through three consecutive analysis steps, closely linked to the video production process, namely: content analysis, style analysis, and context analysis. The virtue of the semantic pathfinder is its learned ability to find a best path of analysis steps on a per-concept basis. To show the generality of this indexing approach we develop detectors for a lexicon of 32 concepts and we evaluate the semantic pathfinder against the 2004 NIST TRECVID video retrieval benchmark, using a news archive of 64 hours. Top ranking performance indicates the merit of the semantic pathfinder. |
![]() | Marcel Worring, Cees G M Snoek, Ork de Rooij, Giang P Nguyen, Richard van Balen, Dennis C Koelma: MediaMill: Advanced Browsing in News Video Archives. Sundaram, H; others, (Ed.): CIVR, pp. 533–536, Springer-Verlag, Heidelberg, Germany, 2006. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{WorringCIVR06, title = {MediaMill: Advanced Browsing in News Video Archives}, author = {Marcel Worring and Cees G M Snoek and Ork de Rooij and Giang P Nguyen and Richard van Balen and Dennis C Koelma}, editor = {H Sundaram and others}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/worring-demo-civr2006.pdf}, year = {2006}, date = {2006-07-01}, booktitle = {CIVR}, volume = {4071}, pages = {533--536}, publisher = {Springer-Verlag}, address = {Heidelberg, Germany}, series = {LNCS}, abstract = {In this paper we present our Mediamill video search engine. The basis for the engine is a semantic indexing process which derives a lexicon of 101 concepts. To support the user in navigating the collection, the system defines a visual similarity space, a semantic similarity space, a semantic thread space, and browsers to explore them. It extends upon [1] with improved browsing tools. The search system is evaluated within the TRECVID benchmark [2]. We obtain a top-3 result for 19 out of 24 search topics. In addition, we obtain the highest mean average precision of all search participants.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this paper we present our Mediamill video search engine. The basis for the engine is a semantic indexing process which derives a lexicon of 101 concepts. To support the user in navigating the collection, the system defines a visual similarity space, a semantic similarity space, a semantic thread space, and browsers to explore them. It extends upon [1] with improved browsing tools. The search system is evaluated within the TRECVID benchmark [2]. We obtain a top-3 result for 19 out of 24 search topics. In addition, we obtain the highest mean average precision of all search participants. |
![]() | Jan C van Gemert, Jan-Mark Geusebroek, Cor J Veenman, Cees G M Snoek, Arnold W M Smeulders: Robust Scene Categorization by Learning Image Statistics in Context. CVPR workshop, pp. 105–112, New York, USA, 2006. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{GemertSLAM06, title = {Robust Scene Categorization by Learning Image Statistics in Context}, author = {Jan C van Gemert and Jan-Mark Geusebroek and Cor J Veenman and Cees G M Snoek and Arnold W M Smeulders}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/gemert-scene-slam2006.pdf}, year = {2006}, date = {2006-06-01}, booktitle = {CVPR workshop}, pages = {105--112}, address = {New York, USA}, abstract = {We present a generic and robust approach for scene categorization. A complex scene is described by proto-concepts like vegetation, water, fire, sky etc. These proto-concepts are represented by low level features, where we use natural images statistics to compactly represent color invariant texture information by a Weibull distribution. We introduce the notion of contextures which preserve the context of textures in a visual scene with an occurrence histogram (context) of similarities to proto-concept descriptors (texture). In contrast to a codebook approach, we use the similarity to all vocabulary elements to generalize beyond the code words. Visual descriptors are attained by combining different types of contexts with different texture parameters. The visual scene descriptors are generalized to visual categories by training a support vector machine. We evaluate our approach on 3 different datasets: 1) 50 categories for the TRECVID video dataset; 2) the Caltech 101-object images; 3) 89 categories being the intersection of the Corel photo stock with the Art Explosion photo stock. Results show that our approach is robust over different datasets, while maintaining competitive performance.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We present a generic and robust approach for scene categorization. A complex scene is described by proto-concepts like vegetation, water, fire, sky etc. These proto-concepts are represented by low level features, where we use natural images statistics to compactly represent color invariant texture information by a Weibull distribution. We introduce the notion of contextures which preserve the context of textures in a visual scene with an occurrence histogram (context) of similarities to proto-concept descriptors (texture). In contrast to a codebook approach, we use the similarity to all vocabulary elements to generalize beyond the code words. Visual descriptors are attained by combining different types of contexts with different texture parameters. The visual scene descriptors are generalized to visual categories by training a support vector machine. We evaluate our approach on 3 different datasets: 1) 50 categories for the TRECVID video dataset; 2) the Caltech 101-object images; 3) 89 categories being the intersection of the Corel photo stock with the Art Explosion photo stock. Results show that our approach is robust over different datasets, while maintaining competitive performance. |
![]() | Cees G M Snoek, Marcel Worring, Alexander G Hauptmann: Learning Rich Semantics from News Video Archives by Style Analysis. ACM Transactions on Multimedia Computing, Communications and Applications, 2 (2), pp. 91–108, 2006. (Type: Journal Article | Abstract | Links | BibTeX) @article{SnoekTOMCCAP06, title = {Learning Rich Semantics from News Video Archives by Style Analysis}, author = {Cees G M Snoek and Marcel Worring and Alexander G Hauptmann}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-style-tomccap.pdf}, year = {2006}, date = {2006-05-01}, journal = {ACM Transactions on Multimedia Computing, Communications and Applications}, volume = {2}, number = {2}, pages = {91--108}, abstract = {We propose a generic and robust framework for news video indexing, which we found on a broadcast news production model. We identify within this model four production phases, each providing useful metadata for annotation. In contrast to semi-automatic indexing approaches, which exploit this information at production time, we adhere to an automatic data-driven approach. To that end, we analyze a digital news video using a separate set of multimodal detectors for each production phase. By combining the resulting production-derived features into a statistical classifier ensemble, the framework facilitates robust classification of several rich semantic concepts in news video; rich meaning that concepts share many similarities in their production process. Experiments on an archive of 120 hours of news video, from the 2003 TRECVID benchmark, show that a combined analysis of production phases yields the best results. In addition, we demonstrate that the accuracy of the proposed style analysis framework for classification of several rich semantic concepts is state-of-the-art.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We propose a generic and robust framework for news video indexing, which we found on a broadcast news production model. We identify within this model four production phases, each providing useful metadata for annotation. In contrast to semi-automatic indexing approaches, which exploit this information at production time, we adhere to an automatic data-driven approach. To that end, we analyze a digital news video using a separate set of multimodal detectors for each production phase. By combining the resulting production-derived features into a statistical classifier ensemble, the framework facilitates robust classification of several rich semantic concepts in news video; rich meaning that concepts share many similarities in their production process. Experiments on an archive of 120 hours of news video, from the 2003 TRECVID benchmark, show that a combined analysis of production phases yields the best results. In addition, we demonstrate that the accuracy of the proposed style analysis framework for classification of several rich semantic concepts is state-of-the-art. |
Arnold W M Smeulders, Jan C van Gemert, Jan-Mark Geusebroek, Cees G M Snoek, Marcel Worring: Browsing for the National Dutch Video Archive. ISCCSP, Marrakech, Morocco, 2006. (Type: Inproceedings | Abstract | BibTeX) @inproceedings{SmeuldersISCCSP06, title = {Browsing for the National Dutch Video Archive}, author = {Arnold W M Smeulders and Jan C van Gemert and Jan-Mark Geusebroek and Cees G M Snoek and Marcel Worring}, year = {2006}, date = {2006-03-01}, booktitle = {ISCCSP}, address = {Marrakech, Morocco}, abstract = {Pictures have always been a prime carrier of Dutch culture. But pictures take a new form. We live in times of broad- and narrowcasting through Internet, of passive and active viewers, of direct or delayed broadcast, and of digital pictures being delivered in the museum or at home. At the same time, the picture and television archives turn digital. Archives are going to be swamped with information requests unless they swiftly adapt to partially automatic annotation and digital retrieval. Our aim is to provide faster and more complete access to picture archives by digital analysis. Our approach consists of a multi-media analysis of features of pictures in tandem with the language that describes those pictures, under the guidance of a visual ontology. The general scientific paradigm we address is the detection of directly observables fused into semantic features learned from large repositories of digital video. We use invariant, natural-image statisticsbased contextual feature sets for capturing the concepts of images and integrate that as early as possible with text. The system consists of a large for science yet small for practice set of visual concepts permitting the retrieval of semantically formulated queries. We will demonstrate a PC-based, off-line trained state of the art system for browsing broadcast news-archives.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Pictures have always been a prime carrier of Dutch culture. But pictures take a new form. We live in times of broad- and narrowcasting through Internet, of passive and active viewers, of direct or delayed broadcast, and of digital pictures being delivered in the museum or at home. At the same time, the picture and television archives turn digital. Archives are going to be swamped with information requests unless they swiftly adapt to partially automatic annotation and digital retrieval. Our aim is to provide faster and more complete access to picture archives by digital analysis. Our approach consists of a multi-media analysis of features of pictures in tandem with the language that describes those pictures, under the guidance of a visual ontology. The general scientific paradigm we address is the detection of directly observables fused into semantic features learned from large repositories of digital video. We use invariant, natural-image statisticsbased contextual feature sets for capturing the concepts of images and integrate that as early as possible with text. The system consists of a large for science yet small for practice set of visual concepts permitting the retrieval of semantically formulated queries. We will demonstrate a PC-based, off-line trained state of the art system for browsing broadcast news-archives. | |
Cees G M Snoek, Marcel Worring, Dennis C Koelma, Arnold W M Smeulders: Learned Lexicon-driven Interactive Video Retrieval (reprint). Wilkinson, M; Pluim, J (Ed.): Fifth Quinquennial Review 2001-2006, Dutch Society for Pattern Recognition and Image Processing, 2006. (Type: Incollection | BibTeX) @incollection{SnoekNVPHBV06, title = {Learned Lexicon-driven Interactive Video Retrieval (reprint)}, author = {Cees G M Snoek and Marcel Worring and Dennis C Koelma and Arnold W M Smeulders}, editor = {M Wilkinson and J Pluim}, year = {2006}, date = {2006-01-01}, booktitle = {Fifth Quinquennial Review 2001-2006}, publisher = {Dutch Society for Pattern Recognition and Image Processing}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } | |
2005 |
|
![]() | Cees G M Snoek, Marcel Worring, Arnold W M Smeulders: Early versus Late Fusion in Semantic Video Analysis. MM, pp. 399–402, Singapore, 2005, (SIGMM test of time paper award, honourable mention). (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekACM05a, title = {Early versus Late Fusion in Semantic Video Analysis}, author = {Cees G M Snoek and Marcel Worring and Arnold W M Smeulders}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-earlylate-acm2005.pdf}, year = {2005}, date = {2005-11-01}, booktitle = {MM}, pages = {399--402}, address = {Singapore}, abstract = {Semantic analysis of multimodal video aims to index segments of interest at a conceptual level. In reaching this goal, it requires an analysis of several information streams. At some point in the analysis these streams need to be fused. In this paper, we consider two classes of fusion schemes, namely early fusion and late fusion. The former fuses modalities in feature space, the latter fuses modalities in semantic space. We show by experiment on 184 hours of broadcast video data and for 20 semantic concepts, that late fusion tends to give slightly better performance for most concepts. However, for those concepts where early fusion performs better the difference is more significant.}, note = {SIGMM test of time paper award, honourable mention}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Semantic analysis of multimodal video aims to index segments of interest at a conceptual level. In reaching this goal, it requires an analysis of several information streams. At some point in the analysis these streams need to be fused. In this paper, we consider two classes of fusion schemes, namely early fusion and late fusion. The former fuses modalities in feature space, the latter fuses modalities in semantic space. We show by experiment on 184 hours of broadcast video data and for 20 semantic concepts, that late fusion tends to give slightly better performance for most concepts. However, for those concepts where early fusion performs better the difference is more significant. |
![]() | Cees G M Snoek, Marcel Worring, Jan van Gemert, Jan-Mark Geusebroek, Dennis Koelma, Giang P Nguyen, Ork de Rooij, Frank Seinstra: MediaMill: Exploring News Video Archives based on Learned Semantics. MM, pp. 225–226, Singapore, 2005, (Best technical demonstration award). (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekACM05b, title = {MediaMill: Exploring News Video Archives based on Learned Semantics}, author = {Cees G M Snoek and Marcel Worring and Jan van Gemert and Jan-Mark Geusebroek and Dennis Koelma and Giang P Nguyen and Ork de Rooij and Frank Seinstra}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-demo-acm2005.pdf}, year = {2005}, date = {2005-11-01}, booktitle = {MM}, pages = {225--226}, address = {Singapore}, abstract = {In this technical demonstration we showcase the MediaMill system. A search engine that facilitates access to news video archives at a semantic level. The core of the system is an unprecedented lexicon of 100 automatically detected semantic concepts. Based on this lexicon we demonstrate how users can obtain highly relevant retrieval results using query-by-concept. In addition, we show how the lexicon of concepts can be exploited for novel applications using advanced semantic visualizations. Several aspects of the MediaMill system are evaluated as part of our TRECVID 2005 efforts.}, note = {Best technical demonstration award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this technical demonstration we showcase the MediaMill system. A search engine that facilitates access to news video archives at a semantic level. The core of the system is an unprecedented lexicon of 100 automatically detected semantic concepts. Based on this lexicon we demonstrate how users can obtain highly relevant retrieval results using query-by-concept. In addition, we show how the lexicon of concepts can be exploited for novel applications using advanced semantic visualizations. Several aspects of the MediaMill system are evaluated as part of our TRECVID 2005 efforts. |
![]() | Cees G M Snoek, Jan C van Gemert, Jan-Mark Geusebroek, Bouke Huurnink, Dennis C Koelma, Giang P Nguyen, Ork de Rooij, Frank J Seinstra, Arnold W M Smeulders, Cor J Veenman, Marcel Worring: The MediaMill TRECVID 2005 Semantic Video Search Engine. TRECVID, Gaithersburg, USA, 2005. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekTRECVID05, title = {The MediaMill TRECVID 2005 Semantic Video Search Engine}, author = {Cees G M Snoek and Jan C van Gemert and Jan-Mark Geusebroek and Bouke Huurnink and Dennis C Koelma and Giang P Nguyen and Ork de Rooij and Frank J Seinstra and Arnold W M Smeulders and Cor J Veenman and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/UvA-MM_TRECVID2005.pdf}, year = {2005}, date = {2005-11-01}, booktitle = {TRECVID}, address = {Gaithersburg, USA}, abstract = {In this paper we describe our TRECVID 2005 experiments. The UvA-MediaMill team participated in four tasks. For the detection of camera work (runid: A_CAM) we investigate the benefit of using a tessellation of detectors in combination with supervised learning over a standard approach using global image information. Experiments indicate that average precision results increase drastically, especially for pan (+51%) and tilt (+28%). For concept detection we propose a generic approach using our semantic pathfinder. Most important novelty compared to last years system is the improved visual analysis using proto-concepts based on Wiccest features. In addition, the path selection mechanism was extended. Based on the semantic pathfinder architecture we are currently able to detect an unprecedented lexicon of 101 semantic concepts in a generic fashion. We performed a large set of experiments (runid: B_vA). The results show that an optimal strategy for generic multimedia analysis is one that learns from the training set on a per-concept basis which tactic to follow. Experiments also indicate that our visual analysis approach is highly promising. The lexicon of 101 semantic concepts forms the basis for our search experiments (runid: B_2_A-MM). We participated in automatic, manual (using only visual information), and interactive search. The lexicon-driven retrieval paradigm aids substantially in all search tasks. When coupled with interaction, exploiting several novel browsing schemes of our semantic video search engine, results are excellent. We obtain a top-3 result for 19 out of 24 search topics. In addition, we obtain the highest mean average precision of all search participants. We exploited the technology developed for the above tasks to explore the BBC rushes. Most intriguing result is that from the lexicon of 101 visual-only models trained for news data 25 concepts perform reasonably well on BBC data also.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this paper we describe our TRECVID 2005 experiments. The UvA-MediaMill team participated in four tasks. For the detection of camera work (runid: A_CAM) we investigate the benefit of using a tessellation of detectors in combination with supervised learning over a standard approach using global image information. Experiments indicate that average precision results increase drastically, especially for pan (+51%) and tilt (+28%). For concept detection we propose a generic approach using our semantic pathfinder. Most important novelty compared to last years system is the improved visual analysis using proto-concepts based on Wiccest features. In addition, the path selection mechanism was extended. Based on the semantic pathfinder architecture we are currently able to detect an unprecedented lexicon of 101 semantic concepts in a generic fashion. We performed a large set of experiments (runid: B_vA). The results show that an optimal strategy for generic multimedia analysis is one that learns from the training set on a per-concept basis which tactic to follow. Experiments also indicate that our visual analysis approach is highly promising. The lexicon of 101 semantic concepts forms the basis for our search experiments (runid: B_2_A-MM). We participated in automatic, manual (using only visual information), and interactive search. The lexicon-driven retrieval paradigm aids substantially in all search tasks. When coupled with interaction, exploiting several novel browsing schemes of our semantic video search engine, results are excellent. We obtain a top-3 result for 19 out of 24 search topics. In addition, we obtain the highest mean average precision of all search participants. We exploited the technology developed for the above tasks to explore the BBC rushes. Most intriguing result is that from the lexicon of 101 visual-only models trained for news data 25 concepts perform reasonably well on BBC data also. |
![]() | Cees G M Snoek: The Authoring Metaphor to Machine Understanding of Multimedia. Universiteit van Amsterdam, 2005, ISBN: 9057761432. (Type: PhD Thesis | Abstract | Links | BibTeX) @phdthesis{SnoekPHD05, title = {The Authoring Metaphor to Machine Understanding of Multimedia}, author = {Cees G M Snoek}, url = {https://pure.uva.nl/ws/files/3927507/38070_snoek_thesis.pdf}, isbn = {9057761432}, year = {2005}, date = {2005-10-01}, school = {Universiteit van Amsterdam}, abstract = {This thesis makes a contribution to the field of multimedia understanding. Where our ultimate aim is to structure the digital multimedia chaos by bridging the semantic gap between computable data features on one end and the semantic interpretation of the data by a user on the other end. We distinguish between produced and non-produced multimedia or video documents. We depart from the view that a produced video is the result of an authoring-driven production process. This authoring process serves as a metaphor for machine-driven understanding. We present a step-by-step extrapolation of this authoring metaphor for automatic multimedia understanding. While doing so, we cover in this thesis an extensive overview of the field, a theoretical foundation for authoring-driven multimedia understanding, state-of-the-art benchmark validation, and practical semantic video retrieval applications.}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } This thesis makes a contribution to the field of multimedia understanding. Where our ultimate aim is to structure the digital multimedia chaos by bridging the semantic gap between computable data features on one end and the semantic interpretation of the data by a user on the other end. We distinguish between produced and non-produced multimedia or video documents. We depart from the view that a produced video is the result of an authoring-driven production process. This authoring process serves as a metaphor for machine-driven understanding. We present a step-by-step extrapolation of this authoring metaphor for automatic multimedia understanding. While doing so, we cover in this thesis an extensive overview of the field, a theoretical foundation for authoring-driven multimedia understanding, state-of-the-art benchmark validation, and practical semantic video retrieval applications. |
![]() | Cees G M Snoek, Marcel Worring: Multimedia Event-Based Video Indexing using Time Intervals. IEEE Transactions on Multimedia, 7 (4), pp. 638–647, 2005. (Type: Journal Article | Abstract | Links | BibTeX) @article{SnoekTMM05, title = {Multimedia Event-Based Video Indexing using Time Intervals}, author = {Cees G M Snoek and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-time-mm.pdf}, year = {2005}, date = {2005-08-01}, journal = {IEEE Transactions on Multimedia}, volume = {7}, number = {4}, pages = {638--647}, abstract = {We propose the Time Interval Multimedia Event (TIME) framework as a robust approach for classification of semantic events in multimodal video documents. The representation used in TIME extends the Allen time relations and allows for proper inclusion of context and synchronization of the heterogeneous information sources involved in multimodal video analysis. To demonstrate the viability of our approach, it was evaluated on the domains of soccer and news broadcasts. For automatic classification of semantic events, we compare three different machine learning techniques, i.c. C4.5 decision tree, Maximum Entropy, and Support Vector Machine. The results show that semantic video indexing results significantly benefit from using the TIME framework.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We propose the Time Interval Multimedia Event (TIME) framework as a robust approach for classification of semantic events in multimodal video documents. The representation used in TIME extends the Allen time relations and allows for proper inclusion of context and synchronization of the heterogeneous information sources involved in multimodal video analysis. To demonstrate the viability of our approach, it was evaluated on the domains of soccer and news broadcasts. For automatic classification of semantic events, we compare three different machine learning techniques, i.c. C4.5 decision tree, Maximum Entropy, and Support Vector Machine. The results show that semantic video indexing results significantly benefit from using the TIME framework. |
![]() | Cees G M Snoek, Marcel Worring, Jan-Mark Geusebroek, Dennis C Koelma, Frank J Seinstra: On the Surplus Value of Semantic Video Analysis Beyond the Key Frame. ICME, Amsterdam, The Netherlands, 2005. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekICME05a, title = {On the Surplus Value of Semantic Video Analysis Beyond the Key Frame}, author = {Cees G M Snoek and Marcel Worring and Jan-Mark Geusebroek and Dennis C Koelma and Frank J Seinstra}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-surplus-icme2005.pdf}, year = {2005}, date = {2005-07-01}, booktitle = {ICME}, address = {Amsterdam, The Netherlands}, abstract = {Typical semantic video analysis methods aim for classification of camera shots based on extracted features from a single key frame only. In this paper, we sketch a video analysis scenario and evaluate the benefit of analysis beyond the key frame for semantic concept detection performance. We developed detectors for a lexicon of 26 concepts, and evaluated their performance on 120 hours of video data. Results show that, on average, detection performance can increase with almost 40% when the analysis method takes more visual content into account.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Typical semantic video analysis methods aim for classification of camera shots based on extracted features from a single key frame only. In this paper, we sketch a video analysis scenario and evaluate the benefit of analysis beyond the key frame for semantic concept detection performance. We developed detectors for a lexicon of 26 concepts, and evaluated their performance on 120 hours of video data. Results show that, on average, detection performance can increase with almost 40% when the analysis method takes more visual content into account. |
![]() | Cees G M Snoek, Dennis Koelma, Jeroen van Rest, Nellie Schipper, Frank J Seinstra, Andrew Thean, Marcel Worring: MediaMill: Searching Multimedia Archives based on Learned Semantics. ICME, Amsterdam, The Netherlands, 2005. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekICME05b, title = {MediaMill: Searching Multimedia Archives based on Learned Semantics}, author = {Cees G M Snoek and Dennis Koelma and Jeroen van Rest and Nellie Schipper and Frank J Seinstra and Andrew Thean and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-mediamill-icme2005.pdf}, year = {2005}, date = {2005-07-01}, booktitle = {ICME}, address = {Amsterdam, The Netherlands}, abstract = {Video is about to conquer the Internet. Real-time delivery of video content is technically possible to any desktop and mobile device, even with modest connections. The main problem hampering massive (re)usage of video content today is the lack of effective content based tools that provide semantic access. In this contribution we discuss systems for both video analysis and video retrieval that facilitate semantic access to video sources. Both systems were evaluated in the 2004 TRECVID benchmark as top performers in their task.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Video is about to conquer the Internet. Real-time delivery of video content is technically possible to any desktop and mobile device, even with modest connections. The main problem hampering massive (re)usage of video content today is the lack of effective content based tools that provide semantic access. In this contribution we discuss systems for both video analysis and video retrieval that facilitate semantic access to video sources. Both systems were evaluated in the 2004 TRECVID benchmark as top performers in their task. |
![]() | Frank J Seinstra, Cees G M Snoek, Dennis C Koelma, Jan-Mark Geusebroek, Marcel Worring: User Transparent Parallel Processing of the 2004 NIST TRECVID Data Set. IPDPS, pp. 90–97, Denver, USA, 2005. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SeinstraIPDPS05, title = {User Transparent Parallel Processing of the 2004 NIST TRECVID Data Set}, author = {Frank J Seinstra and Cees G M Snoek and Dennis C Koelma and Jan-Mark Geusebroek and Marcel Worring}, url = {http://staff.science.uva.nl/~fjseins/Papers/Conferences/ipdps2005.pdf}, year = {2005}, date = {2005-04-01}, booktitle = {IPDPS}, pages = {90--97}, address = {Denver, USA}, abstract = {The Parallel-Horus framework, developed at the University of Amsterdam, is a unique software architecture that allows non-expert parallel programmers to develop fully sequential multimedia applications for efficient execution on homogeneous Beowulf-type commodity clusters. Previously obtained results for realistic, but relatively small-sized applications have shown the feasibility of the Parallel-Horus approach, with parallel performance consistently being found to be optimal with respect to the abstraction level of message passing programs. In this paper we discuss the most serious challenge Parallel-Horus has had to deal with so far: the processing of over 184 hours of video included in the 2004 NIST TRECVID evaluation, i.e. the de facto international standard benchmark for content-based video retrieval. Our results and experiences confirm that Parallel- Horus is a very powerful support-tool for state-of-the-art research and applications in multimedia processing.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The Parallel-Horus framework, developed at the University of Amsterdam, is a unique software architecture that allows non-expert parallel programmers to develop fully sequential multimedia applications for efficient execution on homogeneous Beowulf-type commodity clusters. Previously obtained results for realistic, but relatively small-sized applications have shown the feasibility of the Parallel-Horus approach, with parallel performance consistently being found to be optimal with respect to the abstraction level of message passing programs. In this paper we discuss the most serious challenge Parallel-Horus has had to deal with so far: the processing of over 184 hours of video included in the 2004 NIST TRECVID evaluation, i.e. the de facto international standard benchmark for content-based video retrieval. Our results and experiences confirm that Parallel- Horus is a very powerful support-tool for state-of-the-art research and applications in multimedia processing. |
Cees G M Snoek, Marcel Worring: Multimedia Pattern Recognition in Soccer Video using Time Intervals. Classification the Ubiquitous Challenge, Proceedings of the 28th Annual Conference of the Gesellschaft fur Klassifikation e.V., University of Dortmund, March 9-11, 2004, pp. 97–108, Springer-Verlag, Berlin, Germany, 2005. (Type: Inproceedings | Abstract | BibTeX) @inproceedings{SnoekGFKL05, title = {Multimedia Pattern Recognition in Soccer Video using Time Intervals}, author = {Cees G M Snoek and Marcel Worring}, year = {2005}, date = {2005-01-01}, booktitle = {Classification the Ubiquitous Challenge, Proceedings of the 28th Annual Conference of the Gesellschaft fur Klassifikation e.V., University of Dortmund, March 9-11, 2004}, pages = {97--108}, publisher = {Springer-Verlag}, address = {Berlin, Germany}, series = {Studies in Classification, Data Analysis, and Knowledge Organization}, abstract = {In this paper we propose the Time Interval Multimedia Event (TIME) framework as a robust approach for recognition of multimedia patterns, e.g. highlight events, in soccer video. The representation used in TIME extends the Allen temporal interval relations and allows for proper inclusion of context and synchronization of the heterogeneous information sources involved in multimedia pattern recognition. For automatic classification of highlights in soccer video, we compare three different machine learning techniques, i.c. C4.5 decision tree, Maximum Entropy, and Support Vector Machine. It was found that by using the TIME framework the amount of video a user has to watch in order to see almost all highlights can be reduced considerably, especially in combination with a Support Vector Machine.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this paper we propose the Time Interval Multimedia Event (TIME) framework as a robust approach for recognition of multimedia patterns, e.g. highlight events, in soccer video. The representation used in TIME extends the Allen temporal interval relations and allows for proper inclusion of context and synchronization of the heterogeneous information sources involved in multimedia pattern recognition. For automatic classification of highlights in soccer video, we compare three different machine learning techniques, i.c. C4.5 decision tree, Maximum Entropy, and Support Vector Machine. It was found that by using the TIME framework the amount of video a user has to watch in order to see almost all highlights can be reduced considerably, especially in combination with a Support Vector Machine. | |
![]() | Cees G M Snoek, Marcel Worring: Multimodal Video Indexing: A Review of the State-of-the-art. Multimedia Tools and Applications, 25 (1), pp. 5–35, 2005. (Type: Journal Article | Abstract | Links | BibTeX) @article{SnoekMMTA05, title = {Multimodal Video Indexing: A Review of the State-of-the-art}, author = {Cees G M Snoek and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-review-mmta.pdf}, year = {2005}, date = {2005-01-01}, journal = {Multimedia Tools and Applications}, volume = {25}, number = {1}, pages = {5--35}, abstract = {Efficient and effective handling of video documents depends on the availability of indexes. Manual indexing is unfeasible for large video collections. In this paper we survey several methods aiming at automating this time and resource consuming process. Good reviews on single modality based video indexing have appeared in literature. Effective indexing, however, requires a multimodal approach in which either the most appropriate modality is selected or the different modalities are used in collaborative fashion. Therefore, instead of separately treating the different information sources involved, and their specific algorithms, we focus on the similarities and differences between the modalities. To that end we put forward a unifying and multimodal framework, which views a video document from the perspective of its author. This framework forms the guiding principle for identifying index types, for which automatic methods are found in literature. It furthermore forms the basis for categorizing these different methods.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Efficient and effective handling of video documents depends on the availability of indexes. Manual indexing is unfeasible for large video collections. In this paper we survey several methods aiming at automating this time and resource consuming process. Good reviews on single modality based video indexing have appeared in literature. Effective indexing, however, requires a multimodal approach in which either the most appropriate modality is selected or the different modalities are used in collaborative fashion. Therefore, instead of separately treating the different information sources involved, and their specific algorithms, we focus on the similarities and differences between the modalities. To that end we put forward a unifying and multimodal framework, which views a video document from the perspective of its author. This framework forms the guiding principle for identifying index types, for which automatic methods are found in literature. It furthermore forms the basis for categorizing these different methods. |
2004 |
|
![]() | Cees G M Snoek, Marcel Worring, Jan-Mark Geusebroek, Dennis C Koelma, Frank J Seinstra: The MediaMill TRECVID 2004 Semantic Video Search Engine. TRECVID, Gaithersburg, USA, 2004. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekTRECVID04, title = {The MediaMill TRECVID 2004 Semantic Video Search Engine}, author = {Cees G M Snoek and Marcel Worring and Jan-Mark Geusebroek and Dennis C Koelma and Frank J Seinstra}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/UvA-MM_TRECVID2004.pdf}, year = {2004}, date = {2004-11-01}, booktitle = {TRECVID}, address = {Gaithersburg, USA}, abstract = {This year the UvA-MediaMill team participated in the Feature Extraction and Search Task. We developed a generic approach for semantic concept classification using the semantic value chain. The semantic value chain extracts concepts from video documents based on three consecutive analysis links, named the content link, the style link, and the context link. Various experiments within the analysis links were performed, showing amongst others the merit of processing beyond key frames, the value of style elements, and the importance of learning semantic context. For all experiments a lexicon of 32 concepts was exploited, 10 of which are part of the Feature Extraction Task. Top three system-based ranking in 8 out of the 10 benchmark concepts indicates that our approach is very promising. Apart from this, the lexicon of 32 concepts proved very useful in an interactive search scenario with our semantic video search engine, where we obtained the highest mean average precision of all participants.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This year the UvA-MediaMill team participated in the Feature Extraction and Search Task. We developed a generic approach for semantic concept classification using the semantic value chain. The semantic value chain extracts concepts from video documents based on three consecutive analysis links, named the content link, the style link, and the context link. Various experiments within the analysis links were performed, showing amongst others the merit of processing beyond key frames, the value of style elements, and the importance of learning semantic context. For all experiments a lexicon of 32 concepts was exploited, 10 of which are part of the Feature Extraction Task. Top three system-based ranking in 8 out of the 10 benchmark concepts indicates that our approach is very promising. Apart from this, the lexicon of 32 concepts proved very useful in an interactive search scenario with our semantic video search engine, where we obtained the highest mean average precision of all participants. |
![]() | Cees G M Snoek, Marcel Worring, Alexander G Hauptmann: Detection of TV News Monologues by Style Analysis. ICME, Taipei, Taiwan, 2004. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekICME04, title = {Detection of TV News Monologues by Style Analysis}, author = {Cees G M Snoek and Marcel Worring and Alexander G Hauptmann}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/snoek-style-icme2004.pdf}, year = {2004}, date = {2004-06-01}, booktitle = {ICME}, address = {Taipei, Taiwan}, abstract = {We propose a method for detection of semantic concepts in produced video based on style analysis. Recognition of concepts is done by applying a classifier ensemble to the detected style elements. As a case study we present a method for detecting the concept of news subject monologues. Our approach had the best average precision performance amongst 26 submissions in the 2003 TRECVID benchmark.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We propose a method for detection of semantic concepts in produced video based on style analysis. Recognition of concepts is done by applying a classifier ensemble to the detected style elements. As a case study we present a method for detecting the concept of news subject monologues. Our approach had the best average precision performance amongst 26 submissions in the 2003 TRECVID benchmark. |
2003 |
|
![]() | Alexander Hauptmann, Robert V Baron, Ming-Yu Chen, Michael Christel, Pinar Duygulu, Chang Huang, Rong Jin, Wei-Hao Lin, Dorbin Ng, Neema Moraveji, Norman Papernick, Cees G M Snoek, George Tzanetakis, Jun Yang, Rong Yan, Howard D Wactlar: Informedia at TRECVID 2003: Analyzing and Searching Broadcast News Video. TRECVID, Gaithersburg, USA, 2003. (Type: Inproceedings | Links | BibTeX) @inproceedings{HauptmannTRECVID03, title = {Informedia at TRECVID 2003: Analyzing and Searching Broadcast News Video}, author = {Alexander Hauptmann and Robert V Baron and Ming-Yu Chen and Michael Christel and Pinar Duygulu and Chang Huang and Rong Jin and Wei-Hao Lin and Dorbin Ng and Neema Moraveji and Norman Papernick and Cees G M Snoek and George Tzanetakis and Jun Yang and Rong Yan and Howard D Wactlar}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/TREC03Informedia.pdf}, year = {2003}, date = {2003-11-01}, booktitle = {TRECVID}, address = {Gaithersburg, USA}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
![]() | Cees G M Snoek, Marcel Worring: Time Interval Maximum Entropy based Event Indexing in Soccer Video. ICME, pp. 481–484, Baltimore, USA, 2003. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekICME03a, title = {Time Interval Maximum Entropy based Event Indexing in Soccer Video}, author = {Cees G M Snoek and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/icme2003.pdf}, year = {2003}, date = {2003-07-01}, booktitle = {ICME}, pages = {481--484}, address = {Baltimore, USA}, abstract = {Multimodal indexing of events in video documents poses problems with respect to representation, inclusion of contextual information, and synchronization of the heterogeneous information sources involved. In this paper we present the Time Interval Maximum Entropy (TIME) framework that tackles aforementioned problems. To demonstrate the viability of TIME for event classification in multimodal video, an evaluation was performed on the domain of soccer broadcasts. It was found that by applying TIME, the amount of video a user has to watch in order to see almost all highlights can be reduced considerably.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Multimodal indexing of events in video documents poses problems with respect to representation, inclusion of contextual information, and synchronization of the heterogeneous information sources involved. In this paper we present the Time Interval Maximum Entropy (TIME) framework that tackles aforementioned problems. To demonstrate the viability of TIME for event classification in multimodal video, an evaluation was performed on the domain of soccer broadcasts. It was found that by applying TIME, the amount of video a user has to watch in order to see almost all highlights can be reduced considerably. |
![]() | Cees G M Snoek, Marcel Worring: Goalgle: A Soccer Video Search Engine. ICME, Baltimore, USA, 2003. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekICME03b, title = {Goalgle: A Soccer Video Search Engine}, author = {Cees G M Snoek and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/goalgle.pdf}, year = {2003}, date = {2003-07-01}, booktitle = {ICME}, address = {Baltimore, USA}, abstract = {Goalgle is a prototype search engine for soccer video. Browsing and retrieval functionality is provided by means of a web based interface. This interface allows users to jump to video segments from a collection of prerecorded and analyzed soccer matches based on queries on specific players, events, matches, and/or text. In this contribution we discuss the system architecture and functionality of the Goalgle soccer video search engine.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Goalgle is a prototype search engine for soccer video. Browsing and retrieval functionality is provided by means of a web based interface. This interface allows users to jump to video segments from a collection of prerecorded and analyzed soccer matches based on queries on specific players, events, matches, and/or text. In this contribution we discuss the system architecture and functionality of the Goalgle soccer video search engine. |
2002 |
|
![]() | Jeroen Vendrig, Jurgen den Hartog, David van Leeuwen, Ioannis Patras, Stephan Raaijmakers, Jeroen van Rest, Cees G M Snoek, Marcel Worring: TREC Feature Extraction by Active Learning. TREC, Gaithersburg, USA, 2002. (Type: Inproceedings | Links | BibTeX) @inproceedings{VendrigTREC02, title = {TREC Feature Extraction by Active Learning}, author = {Jeroen Vendrig and Jurgen den Hartog and David van Leeuwen and Ioannis Patras and Stephan Raaijmakers and Jeroen van Rest and Cees G M Snoek and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/trec2002.pdf}, year = {2002}, date = {2002-11-01}, booktitle = {TREC}, address = {Gaithersburg, USA}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
![]() | Cees G M Snoek, Marcel Worring: A Review on Multimodal Video Indexing. ICME, pp. 21–24, Lausanne, Switzerland, 2002. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{SnoekICME02, title = {A Review on Multimodal Video Indexing}, author = {Cees G M Snoek and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/icme2002.pdf}, year = {2002}, date = {2002-08-01}, booktitle = {ICME}, volume = {2}, pages = {21--24}, address = {Lausanne, Switzerland}, abstract = {Efficient and effective handling of video documents depends on the availability of indexes. Manual indexing is unfeasible for large video collections. Efficient, single modality based, video indexing methods have appeared in literature. Effective indexing, however, requires a multimodal approach in which either the most appropriate modality is selected or the different modalities are used in collaborative fashion. In this paper we present a framework for multimodal video indexing, which views a video document from the perspective of its author. The framework serves as a blueprint for a generic and flexible multimodal video indexing system, and generalizes different state-of-the-art video indexing methods. It furthermore forms the basis for categorizing these different methods.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Efficient and effective handling of video documents depends on the availability of indexes. Manual indexing is unfeasible for large video collections. Efficient, single modality based, video indexing methods have appeared in literature. Effective indexing, however, requires a multimodal approach in which either the most appropriate modality is selected or the different modalities are used in collaborative fashion. In this paper we present a framework for multimodal video indexing, which views a video document from the perspective of its author. The framework serves as a blueprint for a generic and flexible multimodal video indexing system, and generalizes different state-of-the-art video indexing methods. It furthermore forms the basis for categorizing these different methods. |
![]() | Marcel Worring, Andrew Bagdanov, Jan C van Gemert, Jan-Mark Geusebroek, Minh Hoang, Guus Schreiber, Cees G M Snoek, Jeroen Vendrig, Jan Wielemaker, Arnold W M Smeulders: Interactive Indexing and Retrieval of Multimedia Content. Proceedings of the Annual Conference on Current Trends in Theory and Practice of Informatics, pp. 135-148, Springer-Verlag, Milovy, Czech Republic, 2002. (Type: Inproceedings | Abstract | Links | BibTeX) @inproceedings{WorringSOFSEM02, title = {Interactive Indexing and Retrieval of Multimedia Content}, author = {Marcel Worring and Andrew Bagdanov and Jan C van Gemert and Jan-Mark Geusebroek and Minh Hoang and Guus Schreiber and Cees G M Snoek and Jeroen Vendrig and Jan Wielemaker and Arnold W M Smeulders}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/sofsem2002.pdf}, year = {2002}, date = {2002-01-01}, booktitle = {Proceedings of the Annual Conference on Current Trends in Theory and Practice of Informatics}, volume = {2540}, pages = {135-148}, publisher = {Springer-Verlag}, address = {Milovy, Czech Republic}, series = {Lecture Notes in Computer Science}, abstract = {The indexing and retrieval of multimedia items is difficult due to the semantic gap between the user's perception of the data and the descriptions we can derive automatically from the data using computer vision, speech recognition, and natural language processing. In this contribution we consider the nature of the semantic gap in more detail and show examples of methods that help in limiting the gap. These methods can be automatic, but in general the indexing and retrieval of multimedia items should be a collaborative process between the system and the user. We show how to employ the user's interaction for limiting the semantic gap.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The indexing and retrieval of multimedia items is difficult due to the semantic gap between the user's perception of the data and the descriptions we can derive automatically from the data using computer vision, speech recognition, and natural language processing. In this contribution we consider the nature of the semantic gap in more detail and show examples of methods that help in limiting the gap. These methods can be automatic, but in general the indexing and retrieval of multimedia items should be a collaborative process between the system and the user. We show how to employ the user's interaction for limiting the semantic gap. |
2001 |
|
![]() | Jan Baan, Alex van Ballegooij, Jan-Mark Geusebroek, Djoerd Hiemstra, Jurgen den Hartog, Johan List, Cees G M Snoek, Ioannis Patras, Stephan Raaijmakers, Leon Todoran, Jeroen Vendrig, Arjen de Vries, Thijs Westerveld, Marcel Worring: Lazy Users and Automatic Video Retrieval Tools in (the) Lowlands. TREC, Gaithersburg, USA, 2001. (Type: Inproceedings | Links | BibTeX) @inproceedings{BaanTREC01, title = {Lazy Users and Automatic Video Retrieval Tools in (the) Lowlands}, author = {Jan Baan and Alex van Ballegooij and Jan-Mark Geusebroek and Djoerd Hiemstra and Jurgen den Hartog and Johan List and Cees G M Snoek and Ioannis Patras and Stephan Raaijmakers and Leon Todoran and Jeroen Vendrig and Arjen de Vries and Thijs Westerveld and Marcel Worring}, url = {http://isis-data.science.uva.nl/cgmsnoek/pub/lowlands01.pdf}, year = {2001}, date = {2001-11-01}, booktitle = {TREC}, address = {Gaithersburg, USA}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
This material is presented to ensure timely dissemination of scholarly and technical work. Copyright and all rights therein are retained by authors or by other copyright holders. All persons copying this information are expected to adhere to the terms and constraints invoked by each author’s copyright. In most cases, these works may not be reposted without the explicit permission of the copyright holder.