Document (#37963)

Author
Ramisch, C.
Schreiner, P.
Idiart, M.
Villavicencio, A.
Title
¬An evaluation of methods for the extraction of multiword expressions
Source
http://www.inf.ufrgs.br/pln/nlp/papers/mwe-08-lrec.pdf
Year
20xx
Abstract
This paper focuses on the evaluation of some methods for the automatic acquisition of Multiword Expressions (MWEs). First we investigate the hypothesis that MWEs can be detected solely by the distinct statistical properties of their component words, regardless of their type, comparing 3 statistical measures: Mutual Information, Chi**2 and Permutation Entropy. Moreover, we also look at the impact that the addition of type-specific linguistic information has on the performance of these methods.
Theme
Computerlinguistik

Similar documents (content)

  1. Nagy T., I.: Detecting multiword expressions and named entities in natural language texts (2014) 0.40
    0.39792925 = sum of:
      0.39792925 = product of:
        1.243529 = sum of:
          0.023273801 = weight(abstract_txt:linguistic in 2536) [ClassicSimilarity], result of:
            0.023273801 = score(doc=2536,freq=1.0), product of:
              0.10250325 = queryWeight, product of:
                1.0791814 = boost
                5.8125896 = idf(docFreq=360, maxDocs=44421)
                0.016340807 = queryNorm
              0.22705428 = fieldWeight in 2536, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8125896 = idf(docFreq=360, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
          0.04873359 = weight(abstract_txt:extraction in 2536) [ClassicSimilarity], result of:
            0.04873359 = score(doc=2536,freq=3.0), product of:
              0.116324514 = queryWeight, product of:
                1.1496384 = boost
                6.192079 = idf(docFreq=246, maxDocs=44421)
                0.016340807 = queryNorm
              0.41894513 = fieldWeight in 2536, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.192079 = idf(docFreq=246, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
          0.010500803 = weight(abstract_txt:their in 2536) [ClassicSimilarity], result of:
            0.010500803 = score(doc=2536,freq=2.0), product of:
              0.06029867 = queryWeight, product of:
                1.1705623 = boost
                3.1523883 = idf(docFreq=5161, maxDocs=44421)
                0.016340807 = queryNorm
              0.1741465 = fieldWeight in 2536, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                3.1523883 = idf(docFreq=5161, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
          0.040445577 = weight(abstract_txt:statistical in 2536) [ClassicSimilarity], result of:
            0.040445577 = score(doc=2536,freq=1.0), product of:
              0.18667382 = queryWeight, product of:
                2.0595975 = boost
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.016340807 = queryNorm
              0.21666443 = fieldWeight in 2536, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
          0.03576745 = weight(abstract_txt:methods in 2536) [ClassicSimilarity], result of:
            0.03576745 = score(doc=2536,freq=2.0), product of:
              0.15626025 = queryWeight, product of:
                2.3078656 = boost
                4.1434727 = idf(docFreq=1915, maxDocs=44421)
                0.016340807 = queryNorm
              0.22889668 = fieldWeight in 2536, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.1434727 = idf(docFreq=1915, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
          0.20907784 = weight(abstract_txt:expressions in 2536) [ClassicSimilarity], result of:
            0.20907784 = score(doc=2536,freq=8.0), product of:
              0.27904746 = queryWeight, product of:
                2.518139 = boost
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.016340807 = queryNorm
              0.7492555 = fieldWeight in 2536, product of:
                2.828427 = tf(freq=8.0), with freq of:
                  8.0 = termFreq=8.0
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
          0.51001 = weight(abstract_txt:multiword in 2536) [ClassicSimilarity], result of:
            0.51001 = score(doc=2536,freq=11.0), product of:
              0.4547342 = queryWeight, product of:
                3.2145445 = boost
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.016340807 = queryNorm
              1.1215563 = fieldWeight in 2536, product of:
                3.3166249 = tf(freq=11.0), with freq of:
                  11.0 = termFreq=11.0
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
          0.36571988 = weight(abstract_txt:mwes in 2536) [ClassicSimilarity], result of:
            0.36571988 = score(doc=2536,freq=3.0), product of:
              0.56177366 = queryWeight, product of:
                3.5729039 = boost
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.016340807 = queryNorm
              0.6510093 = fieldWeight in 2536, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.0390625 = fieldNorm(doc=2536)
        0.32 = coord(8/25)
    
  2. Cruys, T. van de; Moirón, B.V.: Semantics-based multiword expression extraction (2007) 0.36
    0.3580655 = sum of:
      0.3580655 = product of:
        1.4919395 = sum of:
          0.06421576 = weight(abstract_txt:measures in 3919) [ClassicSimilarity], result of:
            0.06421576 = score(doc=3919,freq=2.0), product of:
              0.08928318 = queryWeight, product of:
                1.0071878 = boost
                5.424824 = idf(docFreq=531, maxDocs=44421)
                0.016340807 = queryNorm
              0.71923685 = fieldWeight in 3919, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.424824 = idf(docFreq=531, maxDocs=44421)
                0.09375 = fieldNorm(doc=3919)
          0.06752724 = weight(abstract_txt:extraction in 3919) [ClassicSimilarity], result of:
            0.06752724 = score(doc=3919,freq=1.0), product of:
              0.116324514 = queryWeight, product of:
                1.1496384 = boost
                6.192079 = idf(docFreq=246, maxDocs=44421)
                0.016340807 = queryNorm
              0.5805074 = fieldWeight in 3919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.192079 = idf(docFreq=246, maxDocs=44421)
                0.09375 = fieldNorm(doc=3919)
          0.09706938 = weight(abstract_txt:statistical in 3919) [ClassicSimilarity], result of:
            0.09706938 = score(doc=3919,freq=1.0), product of:
              0.18667382 = queryWeight, product of:
                2.0595975 = boost
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.016340807 = queryNorm
              0.5199946 = fieldWeight in 3919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.09375 = fieldNorm(doc=3919)
          0.17740843 = weight(abstract_txt:expressions in 3919) [ClassicSimilarity], result of:
            0.17740843 = score(doc=3919,freq=1.0), product of:
              0.27904746 = queryWeight, product of:
                2.518139 = boost
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.016340807 = queryNorm
              0.63576436 = fieldWeight in 3919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.09375 = fieldNorm(doc=3919)
          0.36905712 = weight(abstract_txt:multiword in 3919) [ClassicSimilarity], result of:
            0.36905712 = score(doc=3919,freq=1.0), product of:
              0.4547342 = queryWeight, product of:
                3.2145445 = boost
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.016340807 = queryNorm
              0.81158864 = fieldWeight in 3919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.09375 = fieldNorm(doc=3919)
          0.7166617 = weight(abstract_txt:mwes in 3919) [ClassicSimilarity], result of:
            0.7166617 = score(doc=3919,freq=2.0), product of:
              0.56177366 = queryWeight, product of:
                3.5729039 = boost
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.016340807 = queryNorm
              1.2757125 = fieldWeight in 3919, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.09375 = fieldNorm(doc=3919)
        0.24 = coord(6/25)
    
  3. Ramisch, C.: Multiword expressions acquisition : a generic and open framework (2015) 0.30
    0.2983144 = sum of:
      0.2983144 = product of:
        1.0654086 = sum of:
          0.037238084 = weight(abstract_txt:linguistic in 2649) [ClassicSimilarity], result of:
            0.037238084 = score(doc=2649,freq=1.0), product of:
              0.10250325 = queryWeight, product of:
                1.0791814 = boost
                5.8125896 = idf(docFreq=360, maxDocs=44421)
                0.016340807 = queryNorm
              0.36328685 = fieldWeight in 2649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8125896 = idf(docFreq=360, maxDocs=44421)
                0.0625 = fieldNorm(doc=2649)
          0.065080866 = weight(abstract_txt:acquisition in 2649) [ClassicSimilarity], result of:
            0.065080866 = score(doc=2649,freq=2.0), product of:
              0.11804248 = queryWeight, product of:
                1.1580967 = boost
                6.2376356 = idf(docFreq=235, maxDocs=44421)
                0.016340807 = queryNorm
              0.5513343 = fieldWeight in 2649, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.2376356 = idf(docFreq=235, maxDocs=44421)
                0.0625 = fieldNorm(doc=2649)
          0.067194365 = weight(abstract_txt:regardless in 2649) [ClassicSimilarity], result of:
            0.067194365 = score(doc=2649,freq=1.0), product of:
              0.15192689 = queryWeight, product of:
                1.3138415 = boost
                7.0764947 = idf(docFreq=101, maxDocs=44421)
                0.016340807 = queryNorm
              0.44228092 = fieldWeight in 2649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.0764947 = idf(docFreq=101, maxDocs=44421)
                0.0625 = fieldNorm(doc=2649)
          0.03409065 = weight(abstract_txt:evaluation in 2649) [ClassicSimilarity], result of:
            0.03409065 = score(doc=2649,freq=1.0), product of:
              0.12176233 = queryWeight, product of:
                1.6634016 = boost
                4.479632 = idf(docFreq=1368, maxDocs=44421)
                0.016340807 = queryNorm
              0.279977 = fieldWeight in 2649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.479632 = idf(docFreq=1368, maxDocs=44421)
                0.0625 = fieldNorm(doc=2649)
          0.04718181 = weight(abstract_txt:type in 2649) [ClassicSimilarity], result of:
            0.04718181 = score(doc=2649,freq=1.0), product of:
              0.15121882 = queryWeight, product of:
                1.8537176 = boost
                4.992163 = idf(docFreq=819, maxDocs=44421)
                0.016340807 = queryNorm
              0.3120102 = fieldWeight in 2649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.992163 = idf(docFreq=819, maxDocs=44421)
                0.0625 = fieldNorm(doc=2649)
          0.26446488 = weight(abstract_txt:expressions in 2649) [ClassicSimilarity], result of:
            0.26446488 = score(doc=2649,freq=5.0), product of:
              0.27904746 = queryWeight, product of:
                2.518139 = boost
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.016340807 = queryNorm
              0.94774157 = fieldWeight in 2649, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.0625 = fieldNorm(doc=2649)
          0.5501579 = weight(abstract_txt:multiword in 2649) [ClassicSimilarity], result of:
            0.5501579 = score(doc=2649,freq=5.0), product of:
              0.4547342 = queryWeight, product of:
                3.2145445 = boost
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.016340807 = queryNorm
              1.209845 = fieldWeight in 2649, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.0625 = fieldNorm(doc=2649)
        0.28 = coord(7/25)
    
  4. Nissim, M.; Zaninello, A,: Modeling the internal variability of multiword expressions through a pattern-based method (2013) 0.27
    0.27499261 = sum of:
      0.27499261 = product of:
        1.1458026 = sum of:
          0.0302716 = weight(abstract_txt:measures in 1990) [ClassicSimilarity], result of:
            0.0302716 = score(doc=1990,freq=1.0), product of:
              0.08928318 = queryWeight, product of:
                1.0071878 = boost
                5.424824 = idf(docFreq=531, maxDocs=44421)
                0.016340807 = queryNorm
              0.3390515 = fieldWeight in 1990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.424824 = idf(docFreq=531, maxDocs=44421)
                0.0625 = fieldNorm(doc=1990)
          0.06366529 = weight(abstract_txt:extraction in 1990) [ClassicSimilarity], result of:
            0.06366529 = score(doc=1990,freq=2.0), product of:
              0.116324514 = queryWeight, product of:
                1.1496384 = boost
                6.192079 = idf(docFreq=246, maxDocs=44421)
                0.016340807 = queryNorm
              0.5473076 = fieldWeight in 1990, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.192079 = idf(docFreq=246, maxDocs=44421)
                0.0625 = fieldNorm(doc=1990)
          0.011880302 = weight(abstract_txt:their in 1990) [ClassicSimilarity], result of:
            0.011880302 = score(doc=1990,freq=1.0), product of:
              0.06029867 = queryWeight, product of:
                1.1705623 = boost
                3.1523883 = idf(docFreq=5161, maxDocs=44421)
                0.016340807 = queryNorm
              0.19702427 = fieldWeight in 1990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.1523883 = idf(docFreq=5161, maxDocs=44421)
                0.0625 = fieldNorm(doc=1990)
          0.11827229 = weight(abstract_txt:expressions in 1990) [ClassicSimilarity], result of:
            0.11827229 = score(doc=1990,freq=1.0), product of:
              0.27904746 = queryWeight, product of:
                2.518139 = boost
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.016340807 = queryNorm
              0.4238429 = fieldWeight in 1990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.0625 = fieldNorm(doc=1990)
          0.24603806 = weight(abstract_txt:multiword in 1990) [ClassicSimilarity], result of:
            0.24603806 = score(doc=1990,freq=1.0), product of:
              0.4547342 = queryWeight, product of:
                3.2145445 = boost
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.016340807 = queryNorm
              0.5410591 = fieldWeight in 1990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.0625 = fieldNorm(doc=1990)
          0.6756751 = weight(abstract_txt:mwes in 1990) [ClassicSimilarity], result of:
            0.6756751 = score(doc=1990,freq=4.0), product of:
              0.56177366 = queryWeight, product of:
                3.5729039 = boost
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.016340807 = queryNorm
              1.2027533 = fieldWeight in 1990, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.0625 = fieldNorm(doc=1990)
        0.24 = coord(6/25)
    
  5. Snajder, J.; Almic, P.: Modeling semantic compositionality of Croatian multiword expressions (2015) 0.23
    0.23072219 = sum of:
      0.23072219 = product of:
        1.4420137 = sum of:
          0.017820451 = weight(abstract_txt:their in 3920) [ClassicSimilarity], result of:
            0.017820451 = score(doc=3920,freq=1.0), product of:
              0.06029867 = queryWeight, product of:
                1.1705623 = boost
                3.1523883 = idf(docFreq=5161, maxDocs=44421)
                0.016340807 = queryNorm
              0.2955364 = fieldWeight in 3920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.1523883 = idf(docFreq=5161, maxDocs=44421)
                0.09375 = fieldNorm(doc=3920)
          0.17740843 = weight(abstract_txt:expressions in 3920) [ClassicSimilarity], result of:
            0.17740843 = score(doc=3920,freq=1.0), product of:
              0.27904746 = queryWeight, product of:
                2.518139 = boost
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.016340807 = queryNorm
              0.63576436 = fieldWeight in 3920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7814865 = idf(docFreq=136, maxDocs=44421)
                0.09375 = fieldNorm(doc=3920)
          0.36905712 = weight(abstract_txt:multiword in 3920) [ClassicSimilarity], result of:
            0.36905712 = score(doc=3920,freq=1.0), product of:
              0.4547342 = queryWeight, product of:
                3.2145445 = boost
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.016340807 = queryNorm
              0.81158864 = fieldWeight in 3920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.656945 = idf(docFreq=20, maxDocs=44421)
                0.09375 = fieldNorm(doc=3920)
          0.8777277 = weight(abstract_txt:mwes in 3920) [ClassicSimilarity], result of:
            0.8777277 = score(doc=3920,freq=3.0), product of:
              0.56177366 = queryWeight, product of:
                3.5729039 = boost
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.016340807 = queryNorm
              1.5624223 = fieldWeight in 3920, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                9.622026 = idf(docFreq=7, maxDocs=44421)
                0.09375 = fieldNorm(doc=3920)
        0.16 = coord(4/25)