cmd: defer stating model info until necessary (#5248)
This commit changes the 'ollama run' command to defer fetching model information until it really needs it. That is, when in interactive mode. It also removes one such case where the model information is fetch in duplicate, just before calling generateInteractive and then again, first thing, in generateInteractive. This positively impacts the performance of the command: ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.168 total ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.220 total ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.217 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 4% cpu 0.652 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.01s user 0.01s system 5% cpu 0.498 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with or would you like to chat? ./after run llama3 'hi' 0.01s user 0.01s system 3% cpu 0.479 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 5% cpu 0.507 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 5% cpu 0.507 total
This commit is contained in:
parent
ccef9431c8
commit
2aa91a937b
2 changed files with 46 additions and 70 deletions
65
cmd/cmd.go
65
cmd/cmd.go
|
@ -287,38 +287,12 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
|
||||||
}
|
}
|
||||||
|
|
||||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||||
client, err := api.ClientFromEnvironment()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
name := args[0]
|
|
||||||
|
|
||||||
// check if the model exists on the server
|
|
||||||
show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
|
||||||
var statusError api.StatusError
|
|
||||||
switch {
|
|
||||||
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
|
||||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
case err != nil:
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
interactive := true
|
interactive := true
|
||||||
|
|
||||||
opts := runOptions{
|
opts := runOptions{
|
||||||
Model: args[0],
|
Model: args[0],
|
||||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||||
Options: map[string]interface{}{},
|
Options: map[string]interface{}{},
|
||||||
MultiModal: slices.Contains(show.Details.Families, "clip"),
|
|
||||||
ParentModel: show.Details.ParentModel,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
format, err := cmd.Flags().GetString("format")
|
format, err := cmd.Flags().GetString("format")
|
||||||
|
@ -362,11 +336,38 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||||
}
|
}
|
||||||
opts.WordWrap = !nowrap
|
opts.WordWrap = !nowrap
|
||||||
|
|
||||||
if !interactive {
|
// Fill out the rest of the options based on information about the
|
||||||
return generate(cmd, opts)
|
// model.
|
||||||
|
client, err := api.ClientFromEnvironment()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return generateInteractive(cmd, opts)
|
name := args[0]
|
||||||
|
info, err := func() (*api.ShowResponse, error) {
|
||||||
|
showReq := &api.ShowRequest{Name: name}
|
||||||
|
info, err := client.Show(cmd.Context(), showReq)
|
||||||
|
var se api.StatusError
|
||||||
|
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
|
||||||
|
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||||
|
}
|
||||||
|
return info, err
|
||||||
|
}()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
|
||||||
|
opts.ParentModel = info.Details.ParentModel
|
||||||
|
opts.Messages = append(opts.Messages, info.Messages...)
|
||||||
|
|
||||||
|
if interactive {
|
||||||
|
return generateInteractive(cmd, opts)
|
||||||
|
}
|
||||||
|
return generate(cmd, opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
func errFromUnknownKey(unknownKeyErr error) error {
|
func errFromUnknownKey(unknownKeyErr error) error {
|
||||||
|
|
|
@ -31,65 +31,40 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
func loadModel(cmd *cobra.Command, opts *runOptions) error {
|
func loadModel(cmd *cobra.Command, opts *runOptions) error {
|
||||||
client, err := api.ClientFromEnvironment()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
p := progress.NewProgress(os.Stderr)
|
p := progress.NewProgress(os.Stderr)
|
||||||
defer p.StopAndClear()
|
defer p.StopAndClear()
|
||||||
|
|
||||||
spinner := progress.NewSpinner("")
|
spinner := progress.NewSpinner("")
|
||||||
p.Add("", spinner)
|
p.Add("", spinner)
|
||||||
|
|
||||||
showReq := api.ShowRequest{Name: opts.Model}
|
client, err := api.ClientFromEnvironment()
|
||||||
showResp, err := client.Show(cmd.Context(), &showReq)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
|
|
||||||
opts.ParentModel = showResp.Details.ParentModel
|
|
||||||
|
|
||||||
if len(showResp.Messages) > 0 {
|
|
||||||
opts.Messages = append(opts.Messages, showResp.Messages...)
|
|
||||||
}
|
|
||||||
|
|
||||||
chatReq := &api.ChatRequest{
|
chatReq := &api.ChatRequest{
|
||||||
Model: opts.Model,
|
Model: opts.Model,
|
||||||
Messages: []api.Message{},
|
KeepAlive: opts.KeepAlive,
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.KeepAlive != nil {
|
return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
||||||
chatReq.KeepAlive = opts.KeepAlive
|
|
||||||
}
|
|
||||||
|
|
||||||
err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
|
||||||
p.StopAndClear()
|
p.StopAndClear()
|
||||||
if len(opts.Messages) > 0 {
|
for _, msg := range opts.Messages {
|
||||||
for _, msg := range opts.Messages {
|
switch msg.Role {
|
||||||
switch msg.Role {
|
case "user":
|
||||||
case "user":
|
fmt.Printf(">>> %s\n", msg.Content)
|
||||||
fmt.Printf(">>> %s\n", msg.Content)
|
case "assistant":
|
||||||
case "assistant":
|
state := &displayResponseState{}
|
||||||
state := &displayResponseState{}
|
displayResponse(msg.Content, opts.WordWrap, state)
|
||||||
displayResponse(msg.Content, opts.WordWrap, state)
|
fmt.Println()
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
fmt.Println()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||||
opts.Messages = make([]api.Message, 0)
|
|
||||||
|
|
||||||
err := loadModel(cmd, &opts)
|
err := loadModel(cmd, &opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
Loading…
Reference in a new issue