Commit 20b3faec authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '330929-persist-npm-package-metadata' into 'master'

Add support for NPM package metadata

See merge request gitlab-org/gitlab!73639
parents 7844282c f152d1c9
......@@ -9,5 +9,9 @@ module Packages
package_name.match(Gitlab::Regex.npm_package_name_regex)&.captures&.first
end
def self.table_name_prefix
'packages_npm_'
end
end
end
# frozen_string_literal: true
class Packages::Npm::Metadatum < ApplicationRecord
belongs_to :package, -> { where(package_type: :npm) }, inverse_of: :npm_metadatum
validates :package, presence: true
# From https://github.com/npm/registry/blob/master/docs/responses/package-metadata.md#abbreviated-version-object
validates :package_json, json_schema: { filename: "npm_package_json" }
validate :ensure_npm_package_type
validate :ensure_package_json_size
private
def ensure_npm_package_type
return if package&.npm?
errors.add(:base, _('Package type must be NPM'))
end
def ensure_package_json_size
return if package_json.to_s.size < 20000
errors.add(:package_json, _('structure is too large'))
end
end
......@@ -39,6 +39,7 @@ class Packages::Package < ApplicationRecord
has_one :nuget_metadatum, inverse_of: :package, class_name: 'Packages::Nuget::Metadatum'
has_one :composer_metadatum, inverse_of: :package, class_name: 'Packages::Composer::Metadatum'
has_one :rubygems_metadatum, inverse_of: :package, class_name: 'Packages::Rubygems::Metadatum'
has_one :npm_metadatum, inverse_of: :package, class_name: 'Packages::Npm::Metadatum'
has_many :build_infos, inverse_of: :package
has_many :pipelines, through: :build_infos, disable_joins: -> { disable_cross_joins_to_pipelines? }
has_one :debian_publication, inverse_of: :package, class_name: 'Packages::Debian::Publication'
......@@ -126,6 +127,7 @@ class Packages::Package < ApplicationRecord
.where(Packages::Composer::Metadatum.table_name => { target_sha: target })
end
scope :preload_composer, -> { preload(:composer_metadatum) }
scope :preload_npm_metadatum, -> { preload(:npm_metadatum) }
scope :without_nuget_temporary_name, -> { where.not(name: Packages::Nuget::TEMPORARY_PACKAGE_NAME) }
......
......@@ -5,26 +5,37 @@ module Packages
class PackagePresenter
include API::Helpers::RelatedResourcesHelpers
# Allowed fields are those defined in the abbreviated form
# defined here: https://github.com/npm/registry/blob/master/docs/responses/package-metadata.md#abbreviated-version-object
# except: name, version, dist, dependencies and xDependencies. Those are generated by this presenter.
PACKAGE_JSON_ALLOWED_FIELDS = %w[deprecated bin directories dist engines _hasShrinkwrap].freeze
attr_reader :name, :packages
def initialize(name, packages)
def initialize(name, packages, include_metadata: false)
@name = name
@packages = packages
@include_metadata = include_metadata
end
def versions
package_versions = {}
packages.each_batch do |relation|
relation.including_dependency_links
.preload_files
.each do |package|
package_file = package.package_files.last
batched_packages = relation.including_dependency_links
.preload_files
if @include_metadata
batched_packages = batched_packages.preload_npm_metadatum
end
batched_packages.each do |package|
package_file = package.package_files.last
next unless package_file
next unless package_file
package_versions[package.version] = build_package_version(package, package_file)
end
package_versions[package.version] = build_package_version(package, package_file)
end
end
package_versions
......@@ -41,14 +52,14 @@ module Packages
end
def build_package_version(package, package_file)
{
abbreviated_package_json(package).merge(
name: package.name,
version: package.version,
dist: {
shasum: package_file.file_sha1,
tarball: tarball_url(package, package_file)
}
}.tap do |package_version|
).tap do |package_version|
package_version.merge!(build_package_dependencies(package))
end
end
......@@ -79,6 +90,13 @@ module Packages
Packages::Tag.for_packages(packages)
.preload_package
end
def abbreviated_package_json(package)
return {} unless @include_metadata
json = package.npm_metadatum&.package_json || {}
json.slice(*PACKAGE_JSON_ALLOWED_FIELDS)
end
end
end
end
......@@ -21,6 +21,10 @@ module Packages
::Packages::CreateDependencyService.new(package, package_dependencies).execute
::Packages::Npm::CreateTagService.new(package, dist_tag).execute
if Feature.enabled?(:packages_npm_abbreviated_metadata, project)
package.create_npm_metadatum!(package_json: version_data)
end
package
end
......
{
"description": "NPM package json metadata",
"type": "object",
"properties": {
"name": { "type": "string" },
"version": { "type": "string" },
"dist": {
"type": "object",
"properties": {
"tarball": { "type": "string" },
"shasum": { "type": "string" }
},
"additionalProperties": true,
"required": [
"tarball",
"shasum"
]
}
},
"additionalProperties": true,
"required": [
"name",
"version",
"dist"
]
}
---
name: packages_npm_abbreviated_metadata
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73639
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/344827
milestone: '14.5'
type: development
group: group::package
default_enabled: false
# frozen_string_literal: true
class CreatePackagesNpmMetadata < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
def up
with_lock_retries do
create_table :packages_npm_metadata, id: false do |t|
t.references :package, primary_key: true, default: nil, index: false, foreign_key: { to_table: :packages_packages, on_delete: :cascade }, type: :bigint
t.jsonb :package_json, default: {}, null: false
t.check_constraint 'char_length(package_json::text) < 20000'
end
end
end
def down
with_lock_retries do
drop_table :packages_npm_metadata
end
end
end
50a5c8af2cde1ae79d627f70d3b266488f76f76b481aefca8516db5360cfa843
\ No newline at end of file
......@@ -17199,6 +17199,12 @@ CREATE SEQUENCE packages_maven_metadata_id_seq
ALTER SEQUENCE packages_maven_metadata_id_seq OWNED BY packages_maven_metadata.id;
CREATE TABLE packages_npm_metadata (
package_id bigint NOT NULL,
package_json jsonb DEFAULT '{}'::jsonb NOT NULL,
CONSTRAINT chk_rails_e5cbc301ae CHECK ((char_length((package_json)::text) < 20000))
);
CREATE TABLE packages_nuget_dependency_link_metadata (
dependency_link_id bigint NOT NULL,
target_framework text NOT NULL,
......@@ -23524,6 +23530,9 @@ ALTER TABLE ONLY packages_helm_file_metadata
ALTER TABLE ONLY packages_maven_metadata
ADD CONSTRAINT packages_maven_metadata_pkey PRIMARY KEY (id);
ALTER TABLE ONLY packages_npm_metadata
ADD CONSTRAINT packages_npm_metadata_pkey PRIMARY KEY (package_id);
ALTER TABLE ONLY packages_nuget_dependency_link_metadata
ADD CONSTRAINT packages_nuget_dependency_link_metadata_pkey PRIMARY KEY (dependency_link_id);
......@@ -30779,6 +30788,9 @@ ALTER TABLE ONLY atlassian_identities
ALTER TABLE ONLY serverless_domain_cluster
ADD CONSTRAINT fk_rails_c09009dee1 FOREIGN KEY (pages_domain_id) REFERENCES pages_domains(id) ON DELETE CASCADE;
ALTER TABLE ONLY packages_npm_metadata
ADD CONSTRAINT fk_rails_c0e5fce6f3 FOREIGN KEY (package_id) REFERENCES packages_packages(id) ON DELETE CASCADE;
ALTER TABLE ONLY labels
ADD CONSTRAINT fk_rails_c1ac5161d8 FOREIGN KEY (group_id) REFERENCES namespaces(id) ON DELETE CASCADE;
......@@ -363,6 +363,10 @@ This rule has a different impact depending on the package name:
This aligns with npmjs.org's behavior. However, npmjs.org does not ever let you publish
the same version more than once, even if it has been deleted.
## `package.json` limitations
You can't publish a package if its `package.json` file exceeds 20,000 characters.
## Install a package
npm packages are commonly-installed by using the `npm` or `yarn` commands
......@@ -427,22 +431,29 @@ and use your organization's URL. The name is case-sensitive and must match the n
//gitlab.example.com/api/v4/projects/<your_project_id>/packages/npm/:_authToken= "<your_token>"
```
### npm dependencies metadata
### npm metadata
> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/11867) in GitLab Premium 12.6.
> - [Moved](https://gitlab.com/gitlab-org/gitlab/-/issues/221259) to GitLab Free in 13.3.
In GitLab 12.6 and later, packages published to the Package Registry expose the following attributes to the npm client:
- name
- version
- dist-tags
- dependencies
- dependencies
- devDependencies
- bundleDependencies
- peerDependencies
- deprecated
> - [Improved](https://gitlab.com/gitlab-org/gitlab/-/issues/330929) in GitLab 14.5.
The GitLab Package Registry exposes the following attributes to the npm client.
These are similar to the [abbreviated metadata format](https://github.com/npm/registry/blob/9e368cf6aaca608da5b2c378c0d53f475298b916/docs/responses/package-metadata.md#abbreviated-metadata-format):
- `name`
- `versions`
- `name`
- `version`
- `deprecated`
- `dependencies`
- `devDependencies`
- `bundleDependencies`
- `peerDependencies`
- `bin`
- `directories`
- `dist`
- `engines`
- `_hasShrinkwrap`
## Add npm distribution tags
......@@ -579,6 +590,10 @@ root namespace and therefore cannot be published again using the same name.
This is also true even if the prior published package shares the same name,
but not the version.
#### Package JSON file is too large
Make sure that your `package.json` file does not [exceed `20,000` characters](#packagejson-limitations).
### `npm publish` returns `npm ERR! 500 Internal Server Error - PUT`
This is a [known issue](https://gitlab.com/gitlab-org/gitlab/-/issues/238950) in GitLab
......
......@@ -121,7 +121,9 @@ module API
not_found!('Packages') if packages.empty?
present ::Packages::Npm::PackagePresenter.new(package_name, packages),
include_metadata = Feature.enabled?(:packages_npm_abbreviated_metadata, project)
present ::Packages::Npm::PackagePresenter.new(package_name, packages, include_metadata: include_metadata),
with: ::API::Entities::NpmPackage
end
end
......
......@@ -355,6 +355,7 @@ packages_dependency_links: :gitlab_main
packages_events: :gitlab_main
packages_helm_file_metadata: :gitlab_main
packages_maven_metadata: :gitlab_main
packages_npm_metadata: :gitlab_main
packages_nuget_dependency_link_metadata: :gitlab_main
packages_nuget_metadata: :gitlab_main
packages_package_file_build_infos: :gitlab_main
......
......@@ -24442,6 +24442,9 @@ msgstr ""
msgid "Package type must be Maven"
msgstr ""
msgid "Package type must be NPM"
msgstr ""
msgid "Package type must be NuGet"
msgstr ""
......@@ -41770,6 +41773,9 @@ msgstr ""
msgid "starts on %{timebox_start_date}"
msgstr ""
msgid "structure is too large"
msgstr ""
msgid "stuck"
msgstr ""
......
# frozen_string_literal: true
FactoryBot.define do
factory :npm_metadatum, class: 'Packages::Npm::Metadatum' do
package { association(:npm_package) }
package_json do
{
'name': package.name,
'version': package.version,
'dist': {
'tarball': 'http://localhost/tarball.tgz',
'shasum': '1234567890'
}
}
end
end
end
......@@ -36,11 +36,11 @@
".{1,}": { "type": "string" }
}
},
"deprecated": {
"type": "object",
"patternProperties": {
".{1,}": { "type": "string" }
}
}
"deprecated": { "type": "string"},
"bin": { "type": "string" },
"directories": { "type": "array" },
"engines": { "type": "object" },
"_hasShrinkwrap": { "type": "boolean" },
"additionalProperties": true
}
}
......@@ -14,7 +14,8 @@
"express":"^4.16.4"
},
"dist":{
"shasum":"f572d396fae9206628714fb2ce00f72e94f2258f"
"shasum":"f572d396fae9206628714fb2ce00f72e94f2258f",
"tarball":"http://localhost/npm/package.tgz"
}
}
},
......
......@@ -28,7 +28,8 @@
"express":"^4.16.4"
},
"dist":{
"shasum":"f572d396fae9206628714fb2ce00f72e94f2258f"
"shasum":"f572d396fae9206628714fb2ce00f72e94f2258f",
"tarball":"http://localhost/npm/package.tgz"
}
}
},
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Packages::Npm::Metadatum, type: :model do
describe 'relationships' do
it { is_expected.to belong_to(:package).inverse_of(:npm_metadatum) }
end
describe 'validations' do
describe 'package', :aggregate_failures do
it { is_expected.to validate_presence_of(:package) }
it 'ensure npm package type' do
metadatum = build(:npm_metadatum)
metadatum.package = build(:nuget_package)
expect(metadatum).not_to be_valid
expect(metadatum.errors).to contain_exactly('Package type must be NPM')
end
end
describe 'package_json', :aggregate_failures do
let(:valid_json) { { 'name' => 'foo', 'version' => 'v1.0', 'dist' => { 'tarball' => 'x', 'shasum' => 'x' } } }
it { is_expected.to allow_value(valid_json).for(:package_json) }
it { is_expected.to allow_value(valid_json.merge('extra-field': { 'foo': 'bar' })).for(:package_json) }
it { is_expected.to allow_value(with_dist { |dist| dist.merge('extra-field': 'x') }).for(:package_json) }
%w[name version dist].each do |field|
it { is_expected.not_to allow_value(valid_json.except(field)).for(:package_json) }
end
%w[tarball shasum].each do |field|
it { is_expected.not_to allow_value(with_dist { |dist| dist.except(field) }).for(:package_json) }
end
it { is_expected.not_to allow_value({}).for(:package_json) }
it { is_expected.not_to allow_value(test: 'test' * 10000).for(:package_json) }
def with_dist
valid_json.tap do |h|
h['dist'] = yield(h['dist'])
end
end
end
end
end
......@@ -20,6 +20,7 @@ RSpec.describe Packages::Package, type: :model do
it { is_expected.to have_one(:debian_distribution).through(:debian_publication).source(:distribution).inverse_of(:packages).class_name('Packages::Debian::ProjectDistribution') }
it { is_expected.to have_one(:nuget_metadatum).inverse_of(:package) }
it { is_expected.to have_one(:rubygems_metadatum).inverse_of(:package) }
it { is_expected.to have_one(:npm_metadatum).inverse_of(:package) }
end
describe '.with_debian_codename' do
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe ::Packages::Npm::PackagePresenter do
using RSpec::Parameterized::TableSyntax
let_it_be(:project) { create(:project) }
let_it_be(:package_name) { "@#{project.root_namespace.path}/test" }
let_it_be(:package1) { create(:npm_package, version: '2.0.4', project: project, name: package_name) }
......@@ -13,42 +15,88 @@ RSpec.describe ::Packages::Npm::PackagePresenter do
let(:presenter) { described_class.new(package_name, packages) }
describe '#versions' do
subject { presenter.versions }
let_it_be('package_json') do
{
'name': package_name,
'version': '2.0.4',
'deprecated': 'warning!',
'bin': './cli.js',
'directories': ['lib'],
'engines': { 'npm': '^7.5.6' },
'_hasShrinkwrap': false,
'dist': {
'tarball': 'http://localhost/tarball.tgz',
'shasum': '1234567890'
},
'custom_field': 'foo_bar'
}
end
context 'for packages without dependencies' do
it { is_expected.to be_a(Hash) }
it { expect(subject[package1.version].with_indifferent_access).to match_schema('public_api/v4/packages/npm_package_version') }
it { expect(subject[package2.version].with_indifferent_access).to match_schema('public_api/v4/packages/npm_package_version') }
let(:presenter) { described_class.new(package_name, packages, include_metadata: include_metadata) }
::Packages::DependencyLink.dependency_types.keys.each do |dependency_type|
it { expect(subject.dig(package1.version, dependency_type)).to be nil }
it { expect(subject.dig(package2.version, dependency_type)).to be nil }
end
subject { presenter.versions }
it 'avoids N+1 database queries' do
check_n_plus_one(:versions) do
create_list(:npm_package, 5, project: project, name: package_name)
where(:has_dependencies, :has_metadatum, :include_metadata) do
true | true | true
false | true | true
true | false | true
false | false | true
# TODO : to remove along with packages_npm_abbreviated_metadata
# See https://gitlab.com/gitlab-org/gitlab/-/issues/344827
true | true | false
false | true | false
true | false | false
false | false | false
end
with_them do
if params[:has_dependencies]
::Packages::DependencyLink.dependency_types.keys.each do |dependency_type|
let_it_be("package_dependency_link_for_#{dependency_type}") { create(:packages_dependency_link, package: package1, dependency_type: dependency_type) }
end
end
end
context 'for packages with dependencies' do
::Packages::DependencyLink.dependency_types.keys.each do |dependency_type|
let_it_be("package_dependency_link_for_#{dependency_type}") { create(:packages_dependency_link, package: package1, dependency_type: dependency_type) }
if params[:has_metadatum]
let_it_be('package_metadatadum') { create(:npm_metadatum, package: package1, package_json: package_json) }
end
it { is_expected.to be_a(Hash) }
it { expect(subject[package1.version].with_indifferent_access).to match_schema('public_api/v4/packages/npm_package_version') }
it { expect(subject[package2.version].with_indifferent_access).to match_schema('public_api/v4/packages/npm_package_version') }
::Packages::DependencyLink.dependency_types.keys.each do |dependency_type|
it { expect(subject.dig(package1.version, dependency_type.to_s)).to be_any }
it { expect(subject[package1.version]['custom_field']).to be_blank }
context 'dependencies' do
::Packages::DependencyLink.dependency_types.keys.each do |dependency_type|
if params[:has_dependencies]
it { expect(subject.dig(package1.version, dependency_type.to_s)).to be_any }
else
it { expect(subject.dig(package1.version, dependency_type)).to be nil }
end
it { expect(subject.dig(package2.version, dependency_type)).to be nil }
end
end
context 'metadatum' do
::Packages::Npm::PackagePresenter::PACKAGE_JSON_ALLOWED_FIELDS.each do |metadata_field|
if params[:has_metadatum] && params[:include_metadata]
it { expect(subject.dig(package1.version, metadata_field)).not_to be nil }
else
it { expect(subject.dig(package1.version, metadata_field)).to be nil }
end
it { expect(subject.dig(package2.version, metadata_field)).to be nil }
end
end
it 'avoids N+1 database queries' do
check_n_plus_one(:versions) do
create_list(:npm_package, 5, project: project, name: package_name).each do |npm_package|
::Packages::DependencyLink.dependency_types.keys.each do |dependency_type|
create(:packages_dependency_link, package: npm_package, dependency_type: dependency_type)
if has_dependencies
::Packages::DependencyLink.dependency_types.keys.each do |dependency_type|
create(:packages_dependency_link, package: npm_package, dependency_type: dependency_type)
end
end
end
end
......
......@@ -180,6 +180,7 @@ RSpec.describe API::NpmProjectPackages do
.to change { project.packages.count }.by(1)
.and change { Packages::PackageFile.count }.by(1)
.and change { Packages::Tag.count }.by(1)
.and change { Packages::Npm::Metadatum.count }.by(1)
expect(response).to have_gitlab_http_status(:ok)
end
......@@ -317,6 +318,25 @@ RSpec.describe API::NpmProjectPackages do
end
end
end
context 'with a too large metadata structure' do
let(:package_name) { "@#{group.path}/my_package_name" }
let(:params) do
upload_params(package_name: package_name, package_version: '1.2.3').tap do |h|
h['versions']['1.2.3']['test'] = 'test' * 10000
end
end
it_behaves_like 'not a package tracking event'
it 'returns an error' do
expect { upload_package_with_token }
.not_to change { project.packages.count }
expect(response).to have_gitlab_http_status(:bad_request)
expect(response.body).to include('Validation failed: Package json structure is too large')
end
end
end
def upload_package(package_name, params = {})
......
......@@ -16,6 +16,7 @@ RSpec.describe Packages::Npm::CreatePackageService do
let(:override) { {} }
let(:package_name) { "@#{namespace.path}/my-app" }
let(:version_data) { params.dig('versions', '1.0.1') }
subject { described_class.new(project, user, params).execute }
......@@ -25,6 +26,7 @@ RSpec.describe Packages::Npm::CreatePackageService do
.to change { Packages::Package.count }.by(1)
.and change { Packages::Package.npm.count }.by(1)
.and change { Packages::Tag.count }.by(1)
.and change { Packages::Npm::Metadatum.count }.by(1)
end
it_behaves_like 'assigns the package creator' do
......@@ -40,6 +42,8 @@ RSpec.describe Packages::Npm::CreatePackageService do
expect(package.version).to eq(version)
end
it { expect(subject.npm_metadatum.package_json).to eq(version_data) }
it { expect(subject.name).to eq(package_name) }
it { expect(subject.version).to eq(version) }
......@@ -54,6 +58,31 @@ RSpec.describe Packages::Npm::CreatePackageService do
expect { subject }.to change { Packages::PackageFileBuildInfo.count }.by(1)
end
end
context 'with a too large metadata structure' do
before do
params[:versions][version][:test] = 'test' * 10000
end
it 'does not create the package' do
expect { subject }.to raise_error(ActiveRecord::RecordInvalid, 'Validation failed: Package json structure is too large')
.and not_change { Packages::Package.count }
.and not_change { Packages::Package.npm.count }
.and not_change { Packages::Tag.count }
.and not_change { Packages::Npm::Metadatum.count }
end
end
context 'with packages_npm_abbreviated_metadata disabled' do
before do
stub_feature_flags(packages_npm_abbreviated_metadata: false)
end
it 'creates a package without metadatum' do
expect { subject }
.not_to change { Packages::Npm::Metadatum.count }
end
end
end
describe '#execute' do
......
......@@ -8,6 +8,8 @@ RSpec.shared_examples 'handling get metadata requests' do |scope: :project|
let_it_be(:package_dependency_link3) { create(:packages_dependency_link, package: package, dependency_type: :bundleDependencies) }
let_it_be(:package_dependency_link4) { create(:packages_dependency_link, package: package, dependency_type: :peerDependencies) }
let_it_be(:package_metadatum) { create(:npm_metadatum, package: package) }
let(:headers) { {} }
subject { get(url, headers: headers) }
......@@ -39,6 +41,19 @@ RSpec.shared_examples 'handling get metadata requests' do |scope: :project|
# query count can slightly change between the examples so we're using a custom threshold
expect { get(url, headers: headers) }.not_to exceed_query_limit(control).with_threshold(4)
end
context 'with packages_npm_abbreviated_metadata disabled' do
before do
stub_feature_flags(packages_npm_abbreviated_metadata: false)
end
it 'calls the presenter without including metadata' do
expect(::Packages::Npm::PackagePresenter)
.to receive(:new).with(anything, anything, include_metadata: false).and_call_original
subject
end
end
end
shared_examples 'reject metadata request' do |status:|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment