2024-02-19 15:16:35 +04:00
package packaging
import (
"bufio"
2025-10-14 11:13:48 +06:00
"context"
2024-02-19 15:16:35 +04:00
"errors"
"io"
"net/textproto"
"strings"
2024-12-25 12:17:08 +06:00
"sync"
2024-02-19 15:16:35 +04:00
"golang.org/x/xerrors"
2024-05-07 16:25:52 +04:00
ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"
2024-06-14 13:36:47 +06:00
"github.com/aquasecurity/trivy/pkg/licensing"
2024-02-26 09:55:15 +04:00
"github.com/aquasecurity/trivy/pkg/log"
xio "github.com/aquasecurity/trivy/pkg/x/io"
2024-02-19 15:16:35 +04:00
)
2024-12-25 12:17:08 +06:00
var licenseMetadataInfoOnce sync . Once
2024-04-11 22:59:09 +04:00
type Parser struct {
logger * log . Logger
}
2024-02-19 15:16:35 +04:00
2024-05-07 16:25:52 +04:00
func NewParser ( ) * Parser {
2024-04-11 22:59:09 +04:00
return & Parser {
logger : log . WithPrefix ( "python" ) ,
}
2024-02-19 15:16:35 +04:00
}
// Parse parses egg and wheel metadata.
// e.g. .egg-info/PKG-INFO and dist-info/METADATA
2025-10-14 11:13:48 +06:00
func ( p * Parser ) Parse ( _ context . Context , r xio . ReadSeekerAt ) ( [ ] ftypes . Package , [ ] ftypes . Dependency , error ) {
2024-02-19 15:16:35 +04:00
rd := textproto . NewReader ( bufio . NewReader ( r ) )
h , err := rd . ReadMIMEHeader ( )
if e := textproto . ProtocolError ( "" ) ; errors . As ( err , & e ) {
// A MIME header may contain bytes in the key or value outside the set allowed by RFC 7230.
// cf. https://cs.opensource.google/go/go/+/a6642e67e16b9d769a0c08e486ba08408064df19
// However, our required key/value could have been correctly parsed,
// so we continue with the subsequent process.
2024-04-11 22:59:09 +04:00
p . logger . Debug ( "MIME protocol error" , log . Err ( err ) )
2024-02-19 15:16:35 +04:00
} else if err != nil && err != io . EOF {
return nil , nil , xerrors . Errorf ( "read MIME error: %w" , err )
}
name , version := h . Get ( "name" ) , h . Get ( "version" )
if name == "" || version == "" {
return nil , nil , xerrors . New ( "name or version is empty" )
}
2024-03-08 07:37:55 +01:00
// "License-Expression" takes precedence in accordance with https://peps.python.org/pep-0639/#deprecate-license-field
// Although keep in mind that pep-0639 is still in draft.
2024-02-19 15:16:35 +04:00
var license string
2024-03-08 07:37:55 +01:00
if le := h . Get ( "License-Expression" ) ; le != "" {
license = le
2024-02-19 15:16:35 +04:00
} else {
2024-03-08 07:37:55 +01:00
// Get possible multiple occurrences of licenses from "Classifier: License" field
// When present it should define the license whereas "License" would define any additional exceptions or modifications
// ref. https://packaging.python.org/en/latest/specifications/core-metadata/#license
var licenses [ ] string
2024-02-19 15:16:35 +04:00
for _ , classifier := range h . Values ( "Classifier" ) {
if strings . HasPrefix ( classifier , "License :: " ) {
values := strings . Split ( classifier , " :: " )
2024-03-08 07:37:55 +01:00
licenseName := values [ len ( values ) - 1 ]
// According to the classifier list https://pypi.org/classifiers/ there is one classifier which seems more like a grouping
// It has no specific license definition (Classifier: License :: OSI Approved) - it is skipped
if licenseName != "OSI Approved" {
licenses = append ( licenses , licenseName )
}
2024-02-19 15:16:35 +04:00
}
}
2024-03-08 07:37:55 +01:00
license = strings . Join ( licenses , ", " )
if l := h . Get ( "License" ) ; l != "" {
if len ( licenses ) != 0 {
2024-12-25 12:17:08 +06:00
licenseMetadataInfoOnce . Do ( func ( ) {
p . logger . Info ( "Licenses acquired from one or more METADATA files may be subject to additional terms. Use `--debug` flag to see all affected packages." )
} )
p . logger . Debug ( "License acquired from METADATA classifiers may be subject to additional terms" ,
2024-04-11 22:59:09 +04:00
log . String ( "name" , name ) , log . String ( "version" , version ) )
2024-03-08 07:37:55 +01:00
} else {
license = l
}
}
2024-02-19 15:16:35 +04:00
}
2024-03-08 07:37:55 +01:00
2024-02-19 15:16:35 +04:00
if license == "" && h . Get ( "License-File" ) != "" {
2024-09-05 16:20:29 +06:00
license = licensing . LicenseFilePrefix + h . Get ( "License-File" )
2024-02-19 15:16:35 +04:00
}
2024-05-07 16:25:52 +04:00
return [ ] ftypes . Package {
2024-02-19 15:16:35 +04:00
{
2024-05-07 16:25:52 +04:00
Name : name ,
Version : version ,
2024-06-14 13:36:47 +06:00
Licenses : licensing . SplitLicenses ( license ) ,
2024-02-19 15:16:35 +04:00
} ,
} , nil , nil
}