Ergun Biçici.
Local Context Selection for Aligning Sentences in Parallel Corpora.
Lecture Notes in Artificial Intelligence,
4635:82-93,
2007.
Note: 6th International and Interdisciplinary Conf. on Modeling and Using Context (CONTEXT 2007).
[PDF]
Keyword(s): Artificial Intelligence,
Machine Translation.
Abstract:
This paper presents a novel language independent context-based sentence alignment technique given parallel corpora. We can view the problem of aligning sentences as finding translations of sentences chosen from different sources. Unlike current approaches which rely on pre-defined features and models, our algorithm employs features derived from the distributional properties of sentences and does not use any language dependent knowledge. We make use of the context of sentences and introduce the notion of Zipfian word vectors which effectively models the distributional properties of a given sentence. We accept the context to be the frame in which the reasoning about sentence alignment is done. We examine alternatives for local context models and demonstrate that our context based sentence alignment algorithm performs better than prominent sentence alignment techniques. Our system dynamically selects the local context for a pair of set of sentences which maximizes the correlation. We evaluate the performance of our system based on two different measures: sentence alignment accuracy and sentence alignment coverage. We compare the performance of our system with commonly used sentence alignment systems and show that our system performs 1.1951 to 1.5404 times better in reducing the error rate in alignment accuracy and coverage. |
@article{Bicici:CONTEXT07,
title = {Local Context Selection for Aligning Sentences in Parallel Corpora},
author = {Ergun Bi\c{c}ici},
booktitle = {6th International and Interdisciplinary Conf. on Modeling and Using Context (CONTEXT 2007), LNAI},
year = {2007},
pages = {82--93},
journal = {Lecture Notes in Artificial Intelligence},
volume = {4635},
address = {Roskilde, Denmark},
keywords = {Artificial Intelligence, Machine Translation},
note = {6th International and Interdisciplinary Conf. on Modeling and Using Context (CONTEXT 2007)},
pdf = {http://bicici.github.io/publications/2007/ContextSelectionAModels.pdf},
abstract = {This paper presents a novel language independent context-based sentence alignment technique given parallel corpora. We can view the problem of aligning sentences as finding translations of sentences chosen from different sources. Unlike current approaches which rely on pre-defined features and models, our algorithm employs features derived from the distributional properties of sentences and does not use any language dependent knowledge. We make use of the context of sentences and introduce the notion of Zipfian word vectors which effectively models the distributional properties of a given sentence. We accept the context to be the frame in which the reasoning about sentence alignment is done. We examine alternatives for local context models and demonstrate that our context based sentence alignment algorithm performs better than prominent sentence alignment techniques. Our system dynamically selects the local context for a pair of set of sentences which maximizes the correlation. We evaluate the performance of our system based on two different measures: sentence alignment accuracy and sentence alignment coverage. We compare the performance of our system with commonly used sentence alignment systems and show that our system performs 1.1951 to 1.5404 times better in reducing the error rate in alignment accuracy and coverage.},
}
Ergun Biçici and Deniz Yuret.
Locally Scaled Density Based Clustering.
Lecture Notes in Computer Science,
4431:739-748,
2007.
Note: Presentation
LSDBC README
LSDBC Linux Executable (For academic and research purposes)
LSDBC Linux Executable (no noise detection) (For academic and research purposes).
ISBN: 978-3-540-71589-4.
[PDF]
Keyword(s): Machine Learning.
Abstract:
Density based clustering methods allow the identification of arbitrary, not necessarily convex regions of data points that are densely populated. The number of clusters does not need to be specified beforehand; a cluster is defined to be a connected region that exceeds a given density threshold. This paper introduces the notion of local scaling in density based clustering, which determines the density threshold based on the local statistics of the data. The local maxima of density are discovered using a k-nearest-neighbor density estimation and used as centers of potential clusters. Each cluster is grown until the density falls below a pre-specified ratio of the center point's density. The resulting clustering technique is able to identify clusters of arbitrary shape on noisy backgrounds that contain significant density gradients. The focus of this paper is to automate the process of clustering by making use of the local density information for arbitrarily sized, shaped, located, and numbered clusters. The performance of the new algorithm is promising as it is demonstrated on a number of synthetic datasets and images for a wide range of its parameters. |
@article{Bicici:ICANNGA07,
title = {Locally Scaled Density Based Clustering},
author = {Ergun Bi\c{c}ici and Deniz Yuret},
booktitle = {8th International Conf. on Adaptive and Natural Computing Algorithms (ICANNGA 2007), LNCS 4431},
year = {2007},
pages = {739--748},
journal = {Lecture Notes in Computer Science},
volume = {4431},
isbn = {978-3-540-71589-4},
address = {Warsaw, Poland},
keywords = {Machine Learning},
pdf = {http://bicici.github.io/publications/2007/LSDBC/LSDBC-icannga07.pdf},
abstract = {Density based clustering methods allow the identification of arbitrary, not necessarily convex regions of data points that are densely populated. The number of clusters does not need to be specified beforehand; a cluster is defined to be a connected region that exceeds a given density threshold. This paper introduces the notion of local scaling in density based clustering, which determines the density threshold based on the local statistics of the data. The local maxima of density are discovered using a k-nearest-neighbor density estimation and used as centers of potential clusters. Each cluster is grown until the density falls below a pre-specified ratio of the center point's density. The resulting clustering technique is able to identify clusters of arbitrary shape on noisy backgrounds that contain significant density gradients. The focus of this paper is to automate the process of clustering by making use of the local density information for arbitrarily sized, shaped, located, and numbered clusters. The performance of the new algorithm is promising as it is demonstrated on a number of synthetic datasets and images for a wide range of its parameters.},
note = {Presentation
LSDBC README
LSDBC Linux Executable (For academic and research purposes)
LSDBC Linux Executable (no noise detection) (For academic and research purposes)},
}