Document (#16600)

Author
Huffman, S.
Title
Acquaintance : language-independent document categorization by n-grams
Source
The Fourth Text Retrieval Conference (TREC-4). Ed.: K. Harman
Imprint
Gaithersburgh, MD : National Institute of Standards and Technology
Year
1996
Pages
S.359-371
Series
NIST special publication; 500-236
Object
TREC

Similar documents (content)

  1. Khoo, C.S.G.; Dai, D.; Loh, T.E.: Using statistical and contextual information to identify two- and three-character words in Chinese text (2002) 0.62
    0.61873055 = sum of:
      0.61873055 = product of:
        1.0312176 = sum of:
          0.07729129 = weight(abstract_txt:document in 206) [ClassicSimilarity], result of:
            0.07729129 = score(doc=206,freq=2.0), product of:
              0.20363776 = queryWeight, product of:
                1.0295284 = boost
                4.29415 = idf(docFreq=1647, maxDocs=44421)
                0.046062004 = queryNorm
              0.3795528 = fieldWeight in 206, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.29415 = idf(docFreq=1647, maxDocs=44421)
                0.0625 = fieldNorm(doc=206)
          0.19142018 = weight(abstract_txt:independent in 206) [ClassicSimilarity], result of:
            0.19142018 = score(doc=206,freq=2.0), product of:
              0.3727604 = queryWeight, product of:
                1.3929131 = boost
                5.8098235 = idf(docFreq=361, maxDocs=44421)
                0.046062004 = queryNorm
              0.5135207 = fieldWeight in 206, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.8098235 = idf(docFreq=361, maxDocs=44421)
                0.0625 = fieldNorm(doc=206)
          0.7625061 = weight(abstract_txt:grams in 206) [ClassicSimilarity], result of:
            0.7625061 = score(doc=206,freq=4.0), product of:
              0.74345875 = queryWeight, product of:
                1.9671503 = boost
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.046062004 = queryNorm
              1.02562 = fieldWeight in 206, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.0625 = fieldNorm(doc=206)
        0.6 = coord(3/5)
    
  2. Juola, P.; Mikros, G.K.; Vinsick, S.: ¬A comparative assessment of the difficulty of authorship attribution in Greek and in English (2019) 0.51
    0.5136724 = sum of:
      0.5136724 = product of:
        0.6420905 = sum of:
          0.07082974 = weight(abstract_txt:language in 676) [ClassicSimilarity], result of:
            0.07082974 = score(doc=676,freq=2.0), product of:
              0.19212405 = queryWeight, product of:
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.046062004 = queryNorm
              0.3686667 = fieldWeight in 676, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.0625 = fieldNorm(doc=676)
          0.054653194 = weight(abstract_txt:document in 676) [ClassicSimilarity], result of:
            0.054653194 = score(doc=676,freq=1.0), product of:
              0.20363776 = queryWeight, product of:
                1.0295284 = boost
                4.29415 = idf(docFreq=1647, maxDocs=44421)
                0.046062004 = queryNorm
              0.26838437 = fieldWeight in 676, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.29415 = idf(docFreq=1647, maxDocs=44421)
                0.0625 = fieldNorm(doc=676)
          0.1353545 = weight(abstract_txt:independent in 676) [ClassicSimilarity], result of:
            0.1353545 = score(doc=676,freq=1.0), product of:
              0.3727604 = queryWeight, product of:
                1.3929131 = boost
                5.8098235 = idf(docFreq=361, maxDocs=44421)
                0.046062004 = queryNorm
              0.36311397 = fieldWeight in 676, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8098235 = idf(docFreq=361, maxDocs=44421)
                0.0625 = fieldNorm(doc=676)
          0.38125306 = weight(abstract_txt:grams in 676) [ClassicSimilarity], result of:
            0.38125306 = score(doc=676,freq=1.0), product of:
              0.74345875 = queryWeight, product of:
                1.9671503 = boost
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.046062004 = queryNorm
              0.51281 = fieldWeight in 676, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.0625 = fieldNorm(doc=676)
        0.8 = coord(4/5)
    
  3. Robertson, A.M.; Willett, P.: Applications of n-grams in textual information systems (1998) 0.41
    0.4124802 = sum of:
      0.4124802 = product of:
        1.0312005 = sum of:
          0.087647334 = weight(abstract_txt:language in 5715) [ClassicSimilarity], result of:
            0.087647334 = score(doc=5715,freq=1.0), product of:
              0.19212405 = queryWeight, product of:
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.046062004 = queryNorm
              0.45620176 = fieldWeight in 5715, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.109375 = fieldNorm(doc=5715)
          0.9435532 = weight(abstract_txt:grams in 5715) [ClassicSimilarity], result of:
            0.9435532 = score(doc=5715,freq=2.0), product of:
              0.74345875 = queryWeight, product of:
                1.9671503 = boost
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.046062004 = queryNorm
              1.26914 = fieldWeight in 5715, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.109375 = fieldNorm(doc=5715)
        0.4 = coord(2/5)
    
  4. Pearce, C.; Nicholas, C.: TELLTALE: Experiments in a dynamic hypertext environment for degraded and multilingual data (1996) 0.36
    0.36449283 = sum of:
      0.36449283 = product of:
        0.60748804 = sum of:
          0.06260524 = weight(abstract_txt:language in 4139) [ClassicSimilarity], result of:
            0.06260524 = score(doc=4139,freq=1.0), product of:
              0.19212405 = queryWeight, product of:
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.046062004 = queryNorm
              0.3258584 = fieldWeight in 4139, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.078125 = fieldNorm(doc=4139)
          0.06831649 = weight(abstract_txt:document in 4139) [ClassicSimilarity], result of:
            0.06831649 = score(doc=4139,freq=1.0), product of:
              0.20363776 = queryWeight, product of:
                1.0295284 = boost
                4.29415 = idf(docFreq=1647, maxDocs=44421)
                0.046062004 = queryNorm
              0.33548045 = fieldWeight in 4139, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.29415 = idf(docFreq=1647, maxDocs=44421)
                0.078125 = fieldNorm(doc=4139)
          0.47656634 = weight(abstract_txt:grams in 4139) [ClassicSimilarity], result of:
            0.47656634 = score(doc=4139,freq=1.0), product of:
              0.74345875 = queryWeight, product of:
                1.9671503 = boost
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.046062004 = queryNorm
              0.6410125 = fieldWeight in 4139, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.078125 = fieldNorm(doc=4139)
        0.6 = coord(3/5)
    
  5. Ahmed, F.; Nürnberger, A.: Evaluation of n-gram conflation approaches for Arabic text retrieval (2009) 0.36
    0.36201358 = sum of:
      0.36201358 = product of:
        0.60335594 = sum of:
          0.08674836 = weight(abstract_txt:language in 3941) [ClassicSimilarity], result of:
            0.08674836 = score(doc=3941,freq=3.0), product of:
              0.19212405 = queryWeight, product of:
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.046062004 = queryNorm
              0.45152265 = fieldWeight in 3941, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.1709876 = idf(docFreq=1863, maxDocs=44421)
                0.0625 = fieldNorm(doc=3941)
          0.1353545 = weight(abstract_txt:independent in 3941) [ClassicSimilarity], result of:
            0.1353545 = score(doc=3941,freq=1.0), product of:
              0.3727604 = queryWeight, product of:
                1.3929131 = boost
                5.8098235 = idf(docFreq=361, maxDocs=44421)
                0.046062004 = queryNorm
              0.36311397 = fieldWeight in 3941, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8098235 = idf(docFreq=361, maxDocs=44421)
                0.0625 = fieldNorm(doc=3941)
          0.38125306 = weight(abstract_txt:grams in 3941) [ClassicSimilarity], result of:
            0.38125306 = score(doc=3941,freq=1.0), product of:
              0.74345875 = queryWeight, product of:
                1.9671503 = boost
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.046062004 = queryNorm
              0.51281 = fieldWeight in 3941, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.20496 = idf(docFreq=32, maxDocs=44421)
                0.0625 = fieldNorm(doc=3941)
        0.6 = coord(3/5)